diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 000000000..1e7d18272
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,5 @@
+root = true
+
+[*.yml]
+indent_style = space
+indent_size = 2
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 83cfd67af..e4c7ea161 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -84,7 +84,7 @@ body:
id: duplicate
attributes:
label: Is there an existing issue for this?
- description: Please search [our issues](https://github.com/unum-cloud/usearch/issues) to see if this bug already exists.
+ description: Please search [our issues](https://github.com/unum-cloud/USearch/issues) to see if this bug already exists.
options:
- label: I have searched the existing issues
required: true
@@ -92,7 +92,7 @@ body:
id: terms
attributes:
label: Code of Conduct
- description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/usearch/blob/main/CODE_OF_CONDUCT.md)
+ description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/USearch/blob/main/CODE_OF_CONDUCT.md)
options:
- label: I agree to follow this project's Code of Conduct
required: true
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
index 3634918f1..5660ffc32 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -9,7 +9,7 @@ body:
attributes:
value: |
Thanks for taking the time to fill out this feature request!
- We have already scheduled a few features for our [next milestone](https://github.com/unum-cloud/usearch/milestones).
+ We have already scheduled a few features for our [next milestone](https://github.com/unum-cloud/USearch/milestones).
Still, if it is something we have yet to think about, we will happily extend our [roadmap](https://github.com/orgs/unum-cloud/projects/2).
It is also worth noting that choosing between speed and functionality, we generally prefer the former.
So in some cases, it makes more sense to build something on top of USearch rather than patch it ๐ค
@@ -50,7 +50,7 @@ body:
id: duplicate
attributes:
label: Is there an existing issue for this?
- description: Please search [our issues](https://github.com/unum-cloud/usearch/issues) to see if this bug already exists.
+ description: Please search [our issues](https://github.com/unum-cloud/USearch/issues) to see if this bug already exists.
options:
- label: I have searched the existing issues
required: true
@@ -58,7 +58,7 @@ body:
id: terms
attributes:
label: Code of Conduct
- description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/usearch/blob/main/CODE_OF_CONDUCT.md)
+ description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/USearch/blob/main/CODE_OF_CONDUCT.md)
options:
- label: I agree to follow this project's Code of Conduct
required: true
diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml
index 4c92c11d0..4bd86da8c 100644
--- a/.github/workflows/prerelease.yml
+++ b/.github/workflows/prerelease.yml
@@ -9,11 +9,12 @@ on:
env:
GH_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
PYTHONUTF8: 1
+ PYTHONFAULTHANDLER: 1
PYTHON_VERSION: 3.11
DOTNET_VERSION: 8.0.x
NODE_VERSION: 20
JAVA_VERSION: 21
- GO_VERSION: "^1.21.0"
+ GO_VERSION: "^1.22.0"
ANDROID_NDK_VERSION: 26.3.11579264
ANDROID_SDK_VERSION: 21
@@ -38,10 +39,8 @@ jobs:
version-file: "VERSION"
update-version-in: |
package.json:"version": "(\d+\.\d+\.\d+)"
- package-lock.json:"usearch",\n\s+"version": "(\d+\.\d+\.\d+)"
CITATION.cff:^version: (\d+\.\d+\.\d+)
Cargo.toml:^version = "(\d+\.\d+\.\d+)"
- Cargo.lock:name = "usearch"\nversion = "(\d+\.\d+\.\d+)"
wasmer.toml:^version = "(\d+\.\d+\.\d+)"
conanfile.py:version = "(\d+\.\d+\.\d+)"
java/README.md:usearchVersion = '(\d+\.\d+\.\d+)'
@@ -74,8 +73,8 @@ jobs:
- name: Build C/C++
run: |
sudo apt update
- sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev gcc-12 g++-12
- cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1 -D USEARCH_USE_JEMALLOC=1
+ sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev
+ cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_JEMALLOC=1
cmake --build build_artifacts --config RelWithDebInfo
- name: Test C++
run: build_artifacts/test_cpp
@@ -91,7 +90,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install pytest pytest-repeat numpy numba cppyy
- pip install --upgrade git+https://github.com/Maratyszcza/PeachPy
+
python -m pip install .
- name: Test Python
run: pytest
@@ -102,7 +101,7 @@ jobs:
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
- run: npm ci --skip-scripts
+ run: npm install --skip-scripts
- name: Build JavaScript
run: npm run build-js
- name: Test JavaScript
@@ -113,7 +112,10 @@ jobs:
run: |
rustup update stable
rustup default stable
+ rustup component add clippy
rustc -vV
+ - name: Lint Rust
+ run: cargo clippy --all-targets -- -D warnings
- name: Build Rust
run: cargo build
- name: Test Rust
@@ -155,11 +157,11 @@ jobs:
working-directory: ${{ github.workspace }}/golang/
test_ubuntu_clang:
- name: Ubuntu (Clang 18)
+ name: Ubuntu (Clang 20)
runs-on: ubuntu-24.04
env:
- CC: clang
- CXX: clang++
+ CC: clang-20
+ CXX: clang++-20
steps:
- name: Checkout
@@ -168,12 +170,11 @@ jobs:
run: git submodule update --init --recursive
# C/C++
- # Clang 16 isn't available from default repos on Ubuntu 22.04, so we have to install it manually
- name: Build C/C++
run: |
sudo apt update
- sudo apt install -y cmake build-essential libjemalloc-dev clang
- cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=0 -D USEARCH_USE_SIMSIMD=1 -D USEARCH_USE_JEMALLOC=1
+ sudo apt install -y cmake build-essential libjemalloc-dev clang-20
+ cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=0 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_JEMALLOC=1
cmake --build build_artifacts --config RelWithDebInfo
- name: Test C++
run: build_artifacts/test_cpp
@@ -186,7 +187,7 @@ jobs:
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
- run: npm ci --skip-scripts
+ run: npm install --skip-scripts
- name: Build JavaScript
run: npm run build-js
- name: Test JavaScript
@@ -201,7 +202,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install pytest pytest-repeat numpy numba cppyy
- pip install --upgrade git+https://github.com/Maratyszcza/PeachPy
+
python -m pip install .
env:
CXX: clang++
@@ -234,7 +235,7 @@ jobs:
working-directory: ${{ github.workspace }}/csharp
test_macos:
- name: MacOS
+ name: macOS
runs-on: macos-15
steps:
@@ -261,7 +262,7 @@ jobs:
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
- run: npm ci --skip-scripts
+ run: npm install --skip-scripts
- name: Build JavaScript
run: npm run build-js
- name: Test JavaScript
@@ -314,7 +315,7 @@ jobs:
working-directory: ${{ github.workspace }}/csharp
test_windows_x86:
- name: Windows (x86)
+ name: Windows (X86)
runs-on: windows-2022
steps:
- name: Checkout
@@ -352,7 +353,7 @@ jobs:
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
- run: npm ci --skip-scripts
+ run: npm install --skip-scripts
- name: Build JavaScript
run: npm run build-js
- name: Test JavaScript
@@ -383,7 +384,7 @@ jobs:
working-directory: ${{ github.workspace }}/csharp
test_windows_arm:
- name: Windows (Arm)
+ name: Windows (ARM)
runs-on: windows-2022
steps:
- name: Checkout
@@ -407,8 +408,8 @@ jobs:
Write-Host "ARM64 DLL built successfully"
build_docker:
- name: Docker
- runs-on: ubuntu-22.04
+ name: Build Docker
+ runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v6
@@ -421,44 +422,110 @@ jobs:
load: true
push: false
- build_wheels:
- name: Build Python Wheels
- runs-on: ${{ matrix.os }}
- needs:
- [
- test_ubuntu_gcc,
- test_ubuntu_clang,
- test_macos,
- test_windows_x86,
- test_windows_arm,
- ]
+ build_wheels_linux:
+ name: Build Python Wheels (Linux)
+ runs-on: ubuntu-24.04
+ needs: [test_ubuntu_gcc, test_ubuntu_clang]
strategy:
matrix:
- os: [ubuntu-24.04, macos-14, windows-2022]
- python-version: ["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
+ python-version: ["310", "311", "312", "313", "313t", "314", "314t"]
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
- python-version: 3.x
-
- # We only need QEMU for Linux builds
+ python-version: "3.13"
- name: Setup QEMU
- if: matrix.os == 'ubuntu-24.04'
uses: docker/setup-qemu-action@v3
- name: Install cibuildwheel
- run: python -m pip install cibuildwheel
+ uses: nick-fields/retry@v4
+ with:
+ max_attempts: 3
+ retry_wait_seconds: 10
+ timeout_minutes: 180
+ command: python -m pip install cibuildwheel
+ - name: Build wheels
+ uses: nick-fields/retry@v4
+ with:
+ max_attempts: 2
+ retry_wait_seconds: 30
+ timeout_minutes: 180
+ command: cibuildwheel --output-dir wheelhouse
+ env:
+ CIBW_BUILD: cp${{ matrix.python-version }}-*
+ CIBW_ENABLE: cpython-freethreading
+ CIBW_PLATFORM: linux
+
+ build_wheels_macos:
+ name: Build Python Wheels (macOS)
+ runs-on: macos-14
+ needs: [test_macos]
+ strategy:
+ matrix:
+ python-version: ["310", "311", "312", "313", "313t", "314", "314t"]
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v6
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: "3.13"
+ - name: Install cibuildwheel
+ uses: nick-fields/retry@v4
+ with:
+ max_attempts: 3
+ retry_wait_seconds: 10
+ timeout_minutes: 180
+ command: python -m pip install cibuildwheel
- name: Build wheels
- run: cibuildwheel --output-dir wheelhouse
+ uses: nick-fields/retry@v4
+ with:
+ max_attempts: 2
+ retry_wait_seconds: 30
+ timeout_minutes: 180
+ command: cibuildwheel --output-dir wheelhouse
env:
CIBW_BUILD: cp${{ matrix.python-version }}-*
- CIBW_ENABLE: cpython-freethreading # No-GIL 3.13t builds
+ CIBW_ENABLE: cpython-freethreading
+ CIBW_PLATFORM: macos
+
+ build_wheels_windows:
+ name: Build Python Wheels (Windows)
+ runs-on: windows-2022
+ needs: [test_windows_x86]
+ strategy:
+ matrix:
+ python-version: ["310", "311", "312", "313", "313t", "314", "314t"]
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v6
+ - name: Set up Python
+ uses: actions/setup-python@v6
+ with:
+ python-version: "3.13"
+ - name: Install cibuildwheel
+ uses: nick-fields/retry@v4
+ with:
+ max_attempts: 3
+ retry_wait_seconds: 10
+ timeout_minutes: 180
+ command: python -m pip install cibuildwheel
+ - name: Build wheels
+ uses: nick-fields/retry@v4
+ with:
+ max_attempts: 2
+ retry_wait_seconds: 30
+ timeout_minutes: 180
+ command: cibuildwheel --output-dir wheelhouse
+ env:
+ CIBW_BUILD: cp${{ matrix.python-version }}-*
+ CIBW_ENABLE: cpython-freethreading
+ CIBW_PLATFORM: windows
test_ubuntu_cross_compilation:
- name: Cross Compilation
- runs-on: ubuntu-22.04
+ name: Cross-Compile Linux
+ runs-on: ubuntu-24.04
env:
CC: clang
CXX: clang++
@@ -512,7 +579,7 @@ jobs:
test_android:
name: Android
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 592d6b263..f7e44088b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -11,7 +11,7 @@ env:
DOTNET_VERSION: 8.0.x
NODE_VERSION: 20
JAVA_VERSION: 21
- GO_VERSION: "^1.21.0"
+ GO_VERSION: "^1.22.0"
ANDROID_NDK_VERSION: 26.3.11579264
ANDROID_SDK_VERSION: 21
EMSCRIPTEN_VERSION: 3.1.47
@@ -27,7 +27,7 @@ permissions:
jobs:
versioning:
name: Update Version
- runs-on: ubuntu-latest
+ runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v6
@@ -41,10 +41,8 @@ jobs:
version-file: "VERSION"
update-version-in: |
package.json:"version": "(\d+\.\d+\.\d+)"
- package-lock.json:"usearch",\n\s+"version": "(\d+\.\d+\.\d+)"
CITATION.cff:^version: (\d+\.\d+\.\d+)
Cargo.toml:^version = "(\d+\.\d+\.\d+)"
- Cargo.lock:name = "usearch"\nversion = "(\d+\.\d+\.\d+)"
wasmer.toml:^version = "(\d+\.\d+\.\d+)"
conanfile.py:version = "(\d+\.\d+\.\d+)"
java/README.md:usearchVersion = '(\d+\.\d+\.\d+)'
@@ -64,9 +62,9 @@ jobs:
github-token: ${{ secrets.SEMANTIC_RELEASE_TOKEN }}
rebase:
- name: Rebase Dev. Branch
+ name: Rebase Dev Branch
needs: versioning
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
steps:
- name: Checkout the latest code
uses: actions/checkout@v6
@@ -88,8 +86,8 @@ jobs:
force: True
create_linux_deb_package:
- name: Create Debian Package
- runs-on: ubuntu-22.04
+ name: Build Linux Package
+ runs-on: ubuntu-24.04
needs: versioning
env:
CC: clang
@@ -171,7 +169,7 @@ jobs:
update_latest_release: true
create_windows_dll_library:
- name: Create Dll Library for Windows
+ name: Build Windows Library
runs-on: windows-2022
needs: versioning
strategy:
@@ -218,14 +216,20 @@ jobs:
run: |
cmake -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_BUILD_SQLITE=0 -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_BENCH_CPP=0 -B ./build_release
cmake --build ./build_release --config Release
- tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar .\build_release\libusearch_c.dll .\c\usearch.h
+ mkdir pkg
+ copy .\build_release\libusearch_c.dll pkg\
+ copy .\c\usearch.h pkg\
+ tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar -C pkg libusearch_c.dll usearch.h
- name: Build library (MSVC ARM64)
if: matrix.arch == 'arm64'
run: |
cmake -G "Visual Studio 17 2022" -A ARM64 -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_BUILD_SQLITE=0 -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_BENCH_CPP=0 -B ./build_release
cmake --build ./build_release --config Release
- tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar .\build_release\libusearch_c.dll .\c\usearch.h
+ mkdir pkg
+ copy .\build_release\libusearch_c.dll pkg\
+ copy .\c\usearch.h pkg\
+ tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar -C pkg libusearch_c.dll usearch.h
- name: Upload native library for Java
uses: actions/upload-artifact@v5
@@ -243,7 +247,7 @@ jobs:
update_latest_release: true
create_macos_dylib_library:
- name: Create Library for MacOS
+ name: Build macOS Library
runs-on: macos-15
needs: versioning
strategy:
@@ -279,8 +283,8 @@ jobs:
-DUSEARCH_BUILD_BENCH_CPP=0 \
-DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \
-B ./build_release
- cmake --build ./build_release --config Release
- zip -r usearch_macos_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_release/libusearch_c.dylib c/usearch.h
+ cmake --build ./build_release --config Release
+ zip -j usearch_macos_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_release/libusearch_c.dylib c/usearch.h
mv build_release/libusearch_sqlite.dylib usearch_sqlite_macos_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.dylib
- name: Upload native library for Java
@@ -307,8 +311,8 @@ jobs:
update_latest_release: true
create_android_library:
- name: Create Android Libraries
- runs-on: ubuntu-22.04
+ name: Build Android Library
+ runs-on: ubuntu-24.04
needs: versioning
strategy:
fail-fast: false
@@ -356,7 +360,7 @@ jobs:
- name: Package library
run: |
- zip -r usearch_android_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_artifacts/libusearch_c.so c/usearch.h
+ zip -j usearch_android_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_artifacts/libusearch_c.so c/usearch.h
- name: Upload native library for Java
uses: actions/upload-artifact@v5
@@ -417,23 +421,22 @@ jobs:
-DUSEARCH_BUILD_TEST_CPP=0 \
-DUSEARCH_BUILD_BENCH_CPP=0 \
-DUSEARCH_BUILD_LIB_C=0 \
- -DUSEARCH_USE_FP16LIB=1 \
- -DUSEARCH_USE_SIMSIMD=1 \
- -DSIMSIMD_TARGET_HASWELL=1 \
- -DSIMSIMD_TARGET_SKYLAKE=1 \
- -DSIMSIMD_TARGET_ICE=1 \
- -DSIMSIMD_TARGET_GENOA=1 \
- -DSIMSIMD_TARGET_SAPPHIRE=1 \
- -DSIMSIMD_TARGET_NEON=1 \
- -DSIMSIMD_TARGET_NEON_BF16=1 \
- -DSIMSIMD_TARGET_NEON_F16=1 \
- -DSIMSIMD_TARGET_NEON_I8=1 \
- -DSIMSIMD_TARGET_SVE=1 \
- -DSIMSIMD_TARGET_SVE_BF16=1 \
- -DSIMSIMD_TARGET_SVE_F16=1 \
- -DSIMSIMD_TARGET_SVE_I8=1 \
- -DSIMSIMD_TARGET_SVE2=1 \
- -DSIMSIMD_DYNAMIC_DISPATCH=1
+ -DUSEARCH_USE_NUMKONG=1 \
+ -DNK_TARGET_HASWELL=1 \
+ -DNK_TARGET_SKYLAKE=1 \
+ -DNK_TARGET_ICELAKE=1 \
+ -DNK_TARGET_GENOA=1 \
+ -DNK_TARGET_SAPPHIRE=1 \
+ -DNK_TARGET_NEON=1 \
+ -DNK_TARGET_NEONBFDOT=1 \
+ -DNK_TARGET_NEONHALF=1 \
+ -DNK_TARGET_NEONSDOT=1 \
+ -DNK_TARGET_SVE=1 \
+ -DNK_TARGET_SVEBFDOT=1 \
+ -DNK_TARGET_SVEHALF=1 \
+ -DNK_TARGET_SVESDOT=1 \
+ -DNK_TARGET_SVE2=1 \
+ -DNK_DYNAMIC_DISPATCH=1
cmake --build build_artifacts --config Release -j
@@ -454,16 +457,16 @@ jobs:
include:
- arch: x64
cmake_arch: x64
- simsimd_target_x86: 1
- simsimd_target_arm: 0
+ nk_target_x86: 1
+ nk_target_arm: 0
- arch: x86
cmake_arch: Win32
- simsimd_target_x86: 1
- simsimd_target_arm: 0
+ nk_target_x86: 1
+ nk_target_arm: 0
- arch: arm64
cmake_arch: ARM64
- simsimd_target_x86: 0
- simsimd_target_arm: 1
+ nk_target_x86: 0
+ nk_target_arm: 1
steps:
- name: Check out refreshed version
@@ -487,15 +490,14 @@ jobs:
-DUSEARCH_BUILD_TEST_CPP=0 `
-DUSEARCH_BUILD_BENCH_CPP=0 `
-DUSEARCH_BUILD_LIB_C=0 `
- -DUSEARCH_USE_FP16LIB=1 `
- -DUSEARCH_USE_SIMSIMD=1 `
- -DSIMSIMD_TARGET_NEON=${{ matrix.simsimd_target_arm }} `
- -DSIMSIMD_TARGET_HASWELL=${{ matrix.simsimd_target_x86 }} `
- -DSIMSIMD_TARGET_SKYLAKE=${{ matrix.simsimd_target_x86 }} `
- -DSIMSIMD_TARGET_ICE=${{ matrix.simsimd_target_x86 }} `
- -DSIMSIMD_TARGET_GENOA=0 `
- -DSIMSIMD_TARGET_SAPPHIRE=0 `
- -DSIMSIMD_DYNAMIC_DISPATCH=1
+ -DUSEARCH_USE_NUMKONG=1 `
+ -DNK_TARGET_NEON=${{ matrix.nk_target_arm }} `
+ -DNK_TARGET_HASWELL=${{ matrix.nk_target_x86 }} `
+ -DNK_TARGET_SKYLAKE=${{ matrix.nk_target_x86 }} `
+ -DNK_TARGET_ICELAKE=${{ matrix.nk_target_x86 }} `
+ -DNK_TARGET_GENOA=0 `
+ -DNK_TARGET_SAPPHIRE=0 `
+ -DNK_DYNAMIC_DISPATCH=1
cmake --build build_artifacts --config Release
@@ -542,14 +544,13 @@ jobs:
-DUSEARCH_BUILD_TEST_CPP=0 \
-DUSEARCH_BUILD_BENCH_CPP=0 \
-DUSEARCH_BUILD_LIB_C=0 \
- -DUSEARCH_USE_FP16LIB=1 \
- -DUSEARCH_USE_SIMSIMD=1 \
- -DSIMSIMD_TARGET_HASWELL=1 \
- -DSIMSIMD_TARGET_NEON=1 \
- -DSIMSIMD_TARGET_NEON_BF16=1 \
- -DSIMSIMD_TARGET_NEON_F16=1 \
- -DSIMSIMD_TARGET_NEON_I8=1 \
- -DSIMSIMD_DYNAMIC_DISPATCH=1
+ -DUSEARCH_USE_NUMKONG=1 \
+ -DNK_TARGET_HASWELL=1 \
+ -DNK_TARGET_NEON=1 \
+ -DNK_TARGET_NEONBFDOT=1 \
+ -DNK_TARGET_NEONHALF=1 \
+ -DNK_TARGET_NEONSDOT=1 \
+ -DNK_DYNAMIC_DISPATCH=1
cmake --build build_artifacts --config Release
@@ -605,13 +606,12 @@ jobs:
-DUSEARCH_BUILD_TEST_CPP=0 \
-DUSEARCH_BUILD_BENCH_CPP=0 \
-DUSEARCH_BUILD_LIB_C=0 \
- -DUSEARCH_USE_FP16LIB=1 \
- -DUSEARCH_USE_SIMSIMD=1 \
- -DSIMSIMD_TARGET_NEON=1 \
- -DSIMSIMD_TARGET_NEON_BF16=1 \
- -DSIMSIMD_TARGET_NEON_F16=1 \
- -DSIMSIMD_TARGET_NEON_I8=1 \
- -DSIMSIMD_DYNAMIC_DISPATCH=1
+ -DUSEARCH_USE_NUMKONG=1 \
+ -DNK_TARGET_NEON=1 \
+ -DNK_TARGET_NEONBFDOT=1 \
+ -DNK_TARGET_NEONHALF=1 \
+ -DNK_TARGET_NEONSDOT=1 \
+ -DNK_DYNAMIC_DISPATCH=1
cmake --build build_artifacts --config Release
@@ -623,8 +623,8 @@ jobs:
retention-days: 1
create_wasm_library:
- name: Create WASM Libraries
- runs-on: ubuntu-22.04
+ name: Build WASM Library
+ runs-on: ubuntu-24.04
needs: versioning
steps:
- name: Check out refreshed version
@@ -655,14 +655,15 @@ jobs:
-DUSEARCH_BUILD_BENCH_CPP=OFF \
-DUSEARCH_BUILD_SQLITE=OFF \
-DUSEARCH_USE_OPENMP=OFF \
- -DUSEARCH_USE_SIMSIMD=ON \
+ -DUSEARCH_USE_NUMKONG=ON \
-DUSEARCH_USE_JEMALLOC=OFF
emmake cmake --build build_wasm --config Release
cd build_wasm
- tar -czf ../usearch_wasm_${{ steps.set_version.outputs.version }}.tar.gz libusearch_c.* ../c/usearch.h
- zip -r ../usearch_wasm_${{ steps.set_version.outputs.version }}.zip libusearch_c.* ../c/usearch.h
+ cp ../c/usearch.h .
+ tar -czf ../usearch_wasm_${{ steps.set_version.outputs.version }}.tar.gz libusearch_c.* usearch.h
+ zip -j ../usearch_wasm_${{ steps.set_version.outputs.version }}.zip libusearch_c.* usearch.h
- name: Upload archives
uses: xresloader/upload-to-github-release@v1
@@ -674,7 +675,7 @@ jobs:
publish_checksums:
name: Compute Assets Checksums
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
needs:
[
versioning,
@@ -734,7 +735,8 @@ jobs:
strategy:
matrix:
os: [ubuntu-24.04, macos-14, windows-2022]
- python-version: ["38", "39", "310", "311", "312", "313", "313t", "314", "314t"]
+ python-version:
+ ["310", "311", "312", "313", "313t", "314", "314t"]
steps:
- name: Check out refreshed version
uses: actions/checkout@v6
@@ -768,7 +770,7 @@ jobs:
publish_python:
name: Publish Python
needs: build_wheels
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
environment:
name: pypi
url: https://pypi.org/p/usearch
@@ -834,7 +836,7 @@ jobs:
# NPM installation fails spuriously all the time
- name: Install Dependencies
run: |
- npm ci --ignore-scripts || npm ci --ignore-scripts || npm ci --ignore-scripts
+ npm install --ignore-scripts
- run: npm run prebuild-single
if: matrix.os != 'macos-14'
- run: npm run prebuild-darwin-x64+arm64
@@ -866,7 +868,13 @@ jobs:
publish_javascript:
name: Publish JavaScript
needs: build_javascript
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
+ environment:
+ name: npm-usearch
+ url: https://www.npmjs.com/package/usearch
+ permissions:
+ contents: read
+ id-token: write
steps:
- name: Check out refreshed version
@@ -879,7 +887,8 @@ jobs:
- name: Set up Node.js
uses: actions/setup-node@v6
with:
- node-version: ${{ env.NODE_VERSION }}
+ node-version: "24"
+ registry-url: "https://registry.npmjs.org"
- name: Download prebuilds
uses: actions/download-artifact@v6
@@ -894,7 +903,7 @@ jobs:
# NPM installation fails spuriously all the time
- name: Install Dependencies
run: |
- npm ci --ignore-scripts || npm ci --ignore-scripts || npm ci --ignore-scripts
+ npm install --ignore-scripts
- name: Build the JS from TS
run: npm run build-js
@@ -906,10 +915,7 @@ jobs:
run: npm publish --dry-run
- name: Publish
- uses: JS-DevTools/npm-publish@v3
- with:
- token: ${{ secrets.NPM_TOKEN }}
- access: public
+ run: npm publish --provenance --access public
publish_rust:
name: Publish Rust
@@ -924,7 +930,7 @@ jobs:
run: git submodule update --init --recursive
- name: Hide submodule Crates
run: |
- mv simsimd/Cargo.toml simsimd/Cargo.toml.bak
+ mv numkong/Cargo.toml numkong/Cargo.toml.bak
mv stringzilla/Cargo.toml stringzilla/Cargo.toml.bak
- name: Set up Rust
uses: dtolnay/rust-toolchain@stable
@@ -987,7 +993,7 @@ jobs:
cp usearch-jni-linux-amd64/libusearch_jni.so build/libs/usearch/shared/linux-amd64/libusearch_jni.so || true
cp usearch-jni-linux-arm64/libusearch_jni.so build/libs/usearch/shared/linux-arm64/libusearch_jni.so || true
- # Windows
+ # Windows
cp usearch-jni-windows-x64/libusearch_jni.dll build/libs/usearch/shared/windows-amd64/libusearch_jni.dll || true
cp usearch-jni-windows-x86/libusearch_jni.dll build/libs/usearch/shared/windows-x86/libusearch_jni.dll || true
cp usearch-jni-windows-arm64/libusearch_jni.dll build/libs/usearch/shared/windows-arm64/libusearch_jni.dll || true
@@ -1042,7 +1048,7 @@ jobs:
update_latest_release: true
publish_swift:
- name: Publish ObjC & Swift
+ name: Publish Objective-C & Swift
needs: versioning
runs-on: macos-15
steps:
@@ -1062,7 +1068,7 @@ jobs:
publish_docker:
name: Publish Docker Image
needs: versioning
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
permissions:
contents: read
packages: write
@@ -1107,7 +1113,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: [ubuntu-22.04, macos-15, windows-2022]
+ os: [ubuntu-24.04, macos-15, windows-2022]
runs-on: ${{ matrix.os }}
defaults:
run:
@@ -1122,11 +1128,11 @@ jobs:
run: git submodule update --init --recursive
- name: Build C library for Linux
- if: matrix.os == 'ubuntu-22.04'
+ if: matrix.os == 'ubuntu-24.04'
run: |
sudo apt update
- sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev gcc-12 g++-12
- cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1
+ sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev
+ cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=1
cmake --build build_artifacts --config Release
mkdir -p "${{ github.workspace }}/csharp/lib/runtimes/linux-x64/native"
cp "${{ github.workspace }}/build_artifacts/libusearch_c.so" "${{ github.workspace }}/csharp/lib/runtimes/linux-x64/native"
@@ -1136,7 +1142,7 @@ jobs:
run: |
brew update
brew reinstall cmake
- cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0
+ cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0
cmake --build build_artifacts --config Release
mkdir -p "${{ github.workspace }}/csharp/lib/runtimes/osx-arm64/native"
cp "${{ github.workspace }}/build_artifacts/libusearch_c.dylib" "${{ github.workspace }}/csharp/lib/runtimes/osx-arm64/native"
@@ -1145,7 +1151,7 @@ jobs:
if: matrix.os == 'windows-2022'
run: |
choco install cmake
- cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=0 -DUSEARCH_USE_JEMALLOC=0
+ cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_NUMKONG=0 -DUSEARCH_USE_JEMALLOC=0
cmake --build build_artifacts --config Release
mkdir -p "${{ github.workspace }}\csharp\lib\runtimes\win-x64\native"
cp "${{ github.workspace }}\build_artifacts\libusearch_c.dll" "${{ github.workspace }}\csharp\lib\runtimes\win-x64\native"
@@ -1158,7 +1164,7 @@ jobs:
publish_csharp:
name: Publish C#
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
needs: [build_csharp]
env:
SOLUTION: ${{ github.workspace }}/csharp
@@ -1203,7 +1209,7 @@ jobs:
build_docs:
name: Build Docs
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
if: ${{ always() }}
needs:
[
@@ -1224,9 +1230,9 @@ jobs:
ref: "main"
- name: Install dependencies
run: |
- sudo apt update &&
- sudo apt install -y doxygen graphviz dia git &&
- pip install -r docs/requirements.txt &&
+ sudo apt update &&
+ sudo apt install -y doxygen graphviz dia git &&
+ pip install -r docs/requirements.txt &&
npm install -g jsdoc typedoc
- name: Install USearch from PyPi
run: pip install usearch
@@ -1234,7 +1240,7 @@ jobs:
# NPM installation fails spuriously all the time
- name: Build CJS JS
run: |
- npm ci --ignore-scripts || npm ci --ignore-scripts || npm ci --ignore-scripts
+ npm install --ignore-scripts
npm run build-js
- name: Build documentation
run: cd docs && doxygen conf.dox && make html
@@ -1255,7 +1261,7 @@ jobs:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
if: ${{ always() }}
needs: build_docs
steps:
@@ -1278,7 +1284,7 @@ jobs:
deploy_docs_vercel:
name: Deploy Vercel
- runs-on: ubuntu-22.04
+ runs-on: ubuntu-24.04
if: ${{ always() }}
needs: build_docs
steps:
diff --git a/.gitignore b/.gitignore
index 95d1183ad..0251973ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,41 +26,40 @@ cmake_install.cmake
gmon.out
perf.data
-# Python wrappers
+# Python SDK
__pycache__
*.egg-info
dist/
wheelhouse/
-
-# Python Environments
venv/
+uv.lock
-# JS wrappers
+# JS SDK
node_modules/
javascript/dist/
prebuilds/
-
+package-lock.json
# Wasm
-
wasi-sdk-*
-# Rust builds
+# Rust SDK
Cargo.lock
target/
-# Java bindings
+# Java SDK
.gradle
*.class
-# ObjC and Swift
+# ObjC and Swift SDKs
.build
.swiftpm
+Package.resolved
-# Golang builds
+# Golang SDK
golang/usearch.h
-# C# builds
+# C# SDK
csharp/**/[Bb]in/
csharp/**/[Oo]bj/
csharp/lib
@@ -75,11 +74,11 @@ csharp/packages
*.o
*.obj
-# Precompiled Headers
+# Precompiled headers
*.gch
*.pch
-# Compiled Dynamic libraries
+# Compiled dynamic libraries
*.so
*.dylib
*.dll
diff --git a/.gitmodules b/.gitmodules
index 362bf981f..ed1f71fca 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,8 @@
-[submodule "simsimd"]
- path = simsimd
- url = https://github.com/ashvardanian/simsimd
-[submodule "fp16"]
- path = fp16
- url = https://github.com/maratyszcza/fp16
+[submodule "numkong"]
+ path = numkong
+ url = https://github.com/ashvardanian/NumKong
+ branch = main-dev
[submodule "stringzilla"]
path = stringzilla
url = https://github.com/ashvardanian/stringzilla
+ branch = v3.10.10
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 01bccf2e2..82b9422d9 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,8 @@
{
+ "[python]": {
+ "editor.defaultFormatter": "charliermarsh.ruff",
+ "editor.formatOnSave": true
+ },
"[go]": {
"editor.defaultFormatter": "golang.go",
"editor.formatOnSave": true
@@ -90,6 +94,7 @@
"rtype",
"SIMD",
"simsimd",
+ "numkong",
"SLOC",
"Sonatype",
"sorensen",
@@ -116,6 +121,11 @@
"dotnet.defaultSolution": "csharp/Cloud.Unum.USearch.sln",
"editor.insertSpaces": true,
"editor.tabSize": 4,
+ "extensions.experimental.affinity": {
+ "ms-python.vscode-pylance": 3,
+ "ms-vscode.cpptools": 2,
+ "rust-lang.rust-analyzer": 1
+ },
"files.associations": {
"__availability": "cpp",
"__bit_reference": "cpp",
@@ -243,15 +253,33 @@
"xtree": "cpp",
"xutility": "cpp"
},
+ "files.watcherExclude": {
+ "**/build_*/**": true,
+ "**/datasets/**": true,
+ "**/node_modules/**": true,
+ "**/numkong/**": true,
+ "**/stringzilla/**": true,
+ "**/target/**": true
+ },
"java.configuration.updateBuildConfiguration": "automatic",
"java.format.enabled": false,
- "java.saveActions.organizeImports": false,
"prettier.singleQuote": true,
"prettier.useTabs": false,
"python.analysis.diagnosticSeverityOverrides": {
"reportMissingImports": "none"
},
- "python.testing.pytestArgs": ["python"],
+ "python.testing.pytestArgs": [
+ "python"
+ ],
"python.testing.pytestEnabled": true,
- "python.testing.unittestEnabled": false
-}
+ "python.testing.unittestEnabled": false,
+ "rust-analyzer.cargo.features": [],
+ "search.exclude": {
+ "**/build_*/**": true,
+ "**/datasets/**": true,
+ "**/node_modules/**": true,
+ "**/numkong/**": true,
+ "**/stringzilla/**": true,
+ "**/target/**": true
+ }
+}
\ No newline at end of file
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
index da6a33097..c6c2bf84c 100644
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -3,7 +3,7 @@
"tasks": [
{
"label": "Linux Build Debug",
- "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug",
+ "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug",
"args": [],
"type": "shell",
"problemMatcher": [
@@ -12,7 +12,7 @@
},
{
"label": "Linux Build Release",
- "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo",
+ "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo",
"args": [],
"type": "shell",
"problemMatcher": [
@@ -21,19 +21,19 @@
},
{
"label": "MacOS Build Debug",
- "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug",
+ "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug",
"args": [],
- "type": "shell",
+ "type": "shell"
},
{
"label": "MacOS Build Release",
- "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo",
+ "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo",
"args": [],
"type": "shell"
},
{
"label": "Windows Build Debug",
- "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug",
+ "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug",
"args": [],
"type": "shell",
"problemMatcher": [
@@ -51,7 +51,7 @@
},
{
"label": "Windows Build Release",
- "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo",
+ "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo",
"args": [],
"type": "shell",
"problemMatcher": [
@@ -72,7 +72,7 @@
"type": "swift",
"args": [
"build",
- "--build-tests",
+ "--build-tests"
],
"env": {},
"cwd": "${workspaceFolder}",
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
index 2efcbe532..205981825 100644
--- a/BENCHMARKS.md
+++ b/BENCHMARKS.md
@@ -64,8 +64,8 @@ To achieve best highest results we suggest compiling locally for the target arch
```sh
git submodule update --init --recursive
-cmake -USEARCH_BUILD_BENCH_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -B build_profile
-cmake --build build_profile --config RelWithDebInfo -j
+cmake -DUSEARCH_BUILD_BENCH_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_OPENMP=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -B build_profile
+cmake --build build_profile --config RelWithDebInfo --parallel
build_profile/bench_cpp --help
```
@@ -76,15 +76,14 @@ SYNOPSIS
build_profile/bench_cpp [--vectors ] [--queries ] [--neighbors ] [-o
] [-b] [-j ] [-c ] [--expansion-add
] [--expansion-search ] [--rows-skip ]
- [--rows-take ] [-bf16|-f16|-i8|-b1]
- [--ip|--l2sq|--cos|--hamming|--tanimoto|--sorensen|--haversine] [-h]
+ [--rows-take ] [--dtype ] [--metric ] [-h]
OPTIONS
--vectors
- .[fhbd]bin file path to construct the index
+ .[fhbd]bin, .i8bin, .u8bin, .f32bin file path to construct the index
--queries
- .[fhbd]bin file path to query the index
+ .[fhbd]bin, .i8bin, .u8bin, .f32bin file path to query the index
--neighbors
.ibin file path with ground truth
@@ -111,25 +110,12 @@ OPTIONS
--rows-take
Number of vectors to take
- -bf16, --bf16quant
- Enable `bf16_t` quantization
+ --dtype
+ Quantization type: f64, f32, bf16, f16, e5m2, e4m3, e3m2, e2m3, i8, u8, b1
- -f16, --f16quant
- Enable `f16_t` quantization
+ --metric
+ Distance metric: ip, l2sq, cos, hamming, tanimoto, sorensen, haversine
- -i8, --i8quant
- Enable `i8_t` quantization
-
- -b1, --b1quant
- Enable `b1x8_t` quantization
-
- --ip Choose Inner Product metric
- --l2sq Choose L2 Euclidean metric
- --cos Choose Angular metric
- --hamming Choose Hamming metric
- --tanimoto Choose Tanimoto metric
- --sorensen Choose Sorensen metric
- --haversine Choose Haversine metric
-h, --help Print this help information on this tool and exit
```
@@ -139,14 +125,15 @@ Here is an example of running the C++ benchmark:
build_profile/bench_cpp \
--vectors datasets/wiki_1M/base.1M.fbin \
--queries datasets/wiki_1M/query.public.100K.fbin \
- --neighbors datasets/wiki_1M/groundtruth.public.100K.ibin
+ --neighbors datasets/wiki_1M/groundtruth.public.100K.ibin \
+ --dtype bf16 --metric ip
build_profile/bench_cpp \
--vectors datasets/t2i_1B/base.1B.fbin \
--queries datasets/t2i_1B/query.public.100K.fbin \
--neighbors datasets/t2i_1B/groundtruth.public.100K.ibin \
--output datasets/t2i_1B/index.usearch \
- --cos
+ --dtype bf16 --metric cos
```
> Optional parameters include `connectivity`, `expansion_add`, `expansion_search`.
@@ -167,33 +154,80 @@ python/scripts/bench_cluster.py --help
BigANN benchmark is a good starting point, if you are searching for large collections of high-dimensional vectors.
Those often come with precomputed ground-truth neighbors, which is handy for recall evaluation.
+Datasets below are grouped by scale; only configurations with matching ground truth support recall evaluation.
+
+### ~1M Scale โ Development & Testing
+
+| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth |
+| :----------------------------------------- | :---------: | :--------: | :----: | :-------: | :-----------------: |
+| [Unum UForm Wiki][unum-wiki-1m] | `f32` | 256 | IP | 1 GB | 100K queries, yes |
+| [Unum UForm Creative Captions][unum-cc-3m] | `f32` | 256 | IP | 3 GB | cross-modal pairing |
+| [Arxiv with E5][unum-arxiv-2m] | `f32` | 768 | IP | 6 GB | cross-modal pairing |
+
+### ~10M Scale
+
+| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth |
+| :----------------------------------- | :---------: | :--------: | :----: | :-------: | :---------------: |
+| [Meta BIGANN (SIFT)][bigann] | `u8` | 128 | L2 | 1.2 GB | 10K queries, yes |
+| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 3.7 GB | 100K queries, yes |
+| [Yandex Deep][yandex-deep] | `f32` | 96 | L2 | 3.6 GB | ยน no subset GT |
-| Dataset | Scalar Type | Dimensions | Metric | Size |
-| :------------------------------------------ | :---------: | :--------: | :----: | :-------: |
-| [Unum UForm Creative Captions][unum-cc-3m] | `f32` | 256 | IP | 3 GB |
-| [Unum UForm Wiki][unum-wiki-1m] | `f32` | 256 | IP | 1 GB |
-| [Yandex Text-to-Image][yandex-t2i] subset | `f32` | 200 | Cos | 1 GB |
-| [Yandex Deep10M][yandex-deep] subset | `f32` | 96 | L2 | 358 GB |
-| [Microsoft SpaceV-100M][msft-spacev] subset | `i8` | 100 | L2 | 9.3 GB |
-| | | | | |
-| [Microsoft SpaceV-1B][msft-spacev] | `i8` | 100 | L2 | 93 GB |
-| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 373 GB |
-| [Yandex Deep1B][yandex-deep] | `f32` | 96 | L2 | 358 GB |
-| [Yandex Text-to-Image][t2i] | `f32` | 200 | Cos | 750 GB |
-| | | | | |
-| [ViT-L/12 LAION][laion] | `f32` | 2048 | Cos | 2 - 10 TB |
-
-Luckily, smaller samples of those datasets are available.
+> ยน Yandex only publishes ground truth computed against the full 1B dataset. A `base.10M.fbin` exists for
+> download but using 1B ground truth with a subset will produce misleadingly low recall. Use it only for
+> throughput/latency testing, not recall evaluation.
+
+### ~100M Scale
+
+| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth |
+| :----------------------------------- | :---------: | :--------: | :----: | :-------: | :---------------: |
+| [Meta BIGANN (SIFT)][bigann] | `u8` | 128 | L2 | 12 GB | 10K queries, yes |
+| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 37 GB | 100K queries, yes |
+| [Microsoft SpaceV][msft-spacev] | `i8` | 100 | L2 | 9.3 GB | 30K queries, yes |
+
+### ~1B Scale
+
+| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth |
+| :----------------------------------- | :---------: | :--------: | :----: | :-------: | :---------------: |
+| [Meta BIGANN (SIFT)][bigann] | `u8` | 128 | L2 | 119 GB | 10K queries, yes |
+| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 373 GB | 100K queries, yes |
+| [Microsoft SpaceV][msft-spacev] | `i8` | 100 | L2 | 93 GB | 30K queries, yes |
+| [Yandex Text-to-Image][yandex-t2i] | `f32` | 200 | Cos | 750 GB | 100K queries, yes |
+| [Yandex Deep][yandex-deep] | `f32` | 96 | L2 | 358 GB | 10K queries, yes |
+| | | | | | |
+| [ViT-L/12 LAION][laion] | `f32` | 2048 | Cos | 2 - 10 TB | โ |
[unum-cc-3m]: https://huggingface.co/datasets/unum-cloud/ann-cc-3m
[unum-wiki-1m]: https://huggingface.co/datasets/unum-cloud/ann-wiki-1m
-[unum-t2i-1m]: https://huggingface.co/datasets/unum-cloud/ann-t2i-1m
+[unum-arxiv-2m]: https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m
[msft-spacev]: https://github.com/ashvardanian/SpaceV
[msft-turing]: https://learning2hash.github.io/publications/microsoftturinganns1B/
[yandex-t2i]: https://research.yandex.com/blog/benchmarks-for-billion-scale-similarity-search
[yandex-deep]: https://research.yandex.com/blog/benchmarks-for-billion-scale-similarity-search
+[bigann]: https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/
[laion]: https://laion.ai/blog/laion-5b/#download-the-data
+### Unum UForm Creative Captions
+
+A cross-modal dataset of ~2.75M image-text pairs embedded with UForm VL English (256 dimensions).
+No separate query/ground-truth files โ the natural ground truth is the image-text pairing: `image[i]` matches `text[i]`.
+
+```sh
+mkdir -p datasets/cc_3M/ && \
+ wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/images.uform-vl-english.fbin \
+ -O datasets/cc_3M/images.fbin && \
+ wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/texts.uform-vl-english.fbin \
+ -O datasets/cc_3M/texts.fbin
+```
+
+To benchmark cross-modal join:
+
+```bash
+python python/scripts/join.py \
+ --vectors-a datasets/cc_3M/texts.fbin \
+ --vectors-b datasets/cc_3M/images.fbin \
+ --metric cos --diagnostics
+```
+
### Unum UForm Wiki
```sh
@@ -203,32 +237,205 @@ mkdir -p datasets/wiki_1M/ && \
wget -nc https://huggingface.co/datasets/unum-cloud/ann-wiki-1m/resolve/main/groundtruth.public.100K.ibin -P datasets/wiki_1M/
```
+### Arxiv with E5
+
+```sh
+mkdir -p datasets/arxiv_2M/ && \
+ wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/abstract.e5-base-v2.fbin -P datasets/arxiv_2M/ &&
+ wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/title.e5-base-v2.fbin -P datasets/arxiv_2M/
+```
+
### Yandex Text-to-Image
+> __Warning:__ Yandex only publishes ground truth computed against the full 1B dataset.
+> A `base.1M.fbin` subset exists for download but has no matching ground truth โ using 1B ground truth
+> with the 1M subset will produce misleadingly low recall. Use `base.1M.fbin` only for throughput/latency
+> testing, not recall evaluation.
+
```sh
mkdir -p datasets/t2i_1B/ && \
wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/base.1B.fbin -P datasets/t2i_1B/ &&
- wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/base.1M.fbin -P datasets/t2i_1B/ &&
wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/query.public.100K.fbin -P datasets/t2i_1B/ &&
wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/groundtruth.public.100K.ibin -P datasets/t2i_1B/
```
-### Yandex Deep1B
+To run the benchmark (requires the full 1B base for valid recall):
+
+```bash
+build_profile/bench_cpp \
+ --vectors datasets/t2i_1B/base.1B.fbin \
+ --queries datasets/t2i_1B/query.public.100K.fbin \
+ --neighbors datasets/t2i_1B/groundtruth.public.100K.ibin \
+ --output datasets/t2i_1B/index.usearch \
+ --dtype bf16 --metric cos
+```
+
+### Yandex Deep
+
+> __Warning:__ Yandex only publishes ground truth computed against the full 1B dataset.
+> Smaller base files (`base.10M.fbin`, `base.1M.fbin`) are available for download but have no matching
+> ground truth โ using 1B ground truth with a subset will produce misleadingly low recall. Use those files
+> only for throughput/latency testing, not recall evaluation.
```sh
mkdir -p datasets/deep_1B/ && \
wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/base.1B.fbin -P datasets/deep_1B/ &&
- wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/base.10M.fbin -P datasets/deep_1B/ &&
wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/query.public.10K.fbin -P datasets/deep_1B/ &&
wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/groundtruth.public.10K.ibin -P datasets/deep_1B/
```
-### Arxiv with E5
+To run the benchmark (requires the full 1B base for valid recall):
+
+```bash
+build_profile/bench_cpp \
+ --vectors datasets/deep_1B/base.1B.fbin \
+ --queries datasets/deep_1B/query.public.10K.fbin \
+ --neighbors datasets/deep_1B/groundtruth.public.10K.ibin \
+ --output datasets/deep_1B/index.usearch \
+ --dtype bf16 --metric l2sq
+```
+
+### Meta BIGANN โ SIFT
+
+The full 1B dataset is available from Meta. No pre-sliced subset base files exist, so range requests are
+used to download only the first N vectors, followed by a header patch to update the vector count.
+Pre-computed ground truth is available for 10M and 100M subsets.
+
+#### 10M subset, ~1.2 GB
```sh
-mkdir -p datasets/arxiv_2M/ && \
- wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/abstract.e5-base-v2.fbin -P datasets/arxiv_2M/ &&
- wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/title.e5-base-v2.fbin -P datasets/arxiv_2M/
+mkdir -p datasets/sift_10M/ && \
+ wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/query.public.10K.u8bin -P datasets/sift_10M/ && \
+ wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/GT_10M/bigann-10M -O datasets/sift_10M/groundtruth.public.10K.ibin && \
+ wget --header="Range: bytes=0-1280000007" \
+ https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/base.1B.u8bin \
+ -O datasets/sift_10M/base.10M.u8bin && \
+ python3 -c "
+import struct
+with open('datasets/sift_10M/base.10M.u8bin', 'r+b') as f:
+ f.write(struct.pack('I', 10_000_000))
+"
+```
+
+```bash
+build_profile/bench_cpp \
+ --vectors datasets/sift_10M/base.10M.u8bin \
+ --queries datasets/sift_10M/query.public.10K.u8bin \
+ --neighbors datasets/sift_10M/groundtruth.public.10K.ibin \
+ --output datasets/sift_10M/index.usearch \
+ --dtype u8 --metric l2sq
+```
+
+#### 100M subset, ~12 GB
+
+```sh
+mkdir -p datasets/sift_100M/ && \
+ wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/query.public.10K.u8bin -P datasets/sift_100M/ && \
+ wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/GT_100M/bigann-100M -O datasets/sift_100M/groundtruth.public.10K.ibin && \
+ wget --header="Range: bytes=0-12800000007" \
+ https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/base.1B.u8bin \
+ -O datasets/sift_100M/base.100M.u8bin && \
+ python3 -c "
+import struct
+with open('datasets/sift_100M/base.100M.u8bin', 'r+b') as f:
+ f.write(struct.pack('I', 100_000_000))
+"
+```
+
+```bash
+build_profile/bench_cpp \
+ --vectors datasets/sift_100M/base.100M.u8bin \
+ --queries datasets/sift_100M/query.public.10K.u8bin \
+ --neighbors datasets/sift_100M/groundtruth.public.10K.ibin \
+ --output datasets/sift_100M/index.usearch \
+ --dtype u8 --metric l2sq
+```
+
+### Microsoft Turing-ANNS
+
+The full 1B dataset is ~373 GB of `f32` vectors with 100 dimensions.
+Subsets can be obtained via range requests, followed by a header patch to update the vector count.
+Pre-computed ground truth is available for 1M, 10M, and 100M subsets.
+
+#### 1M subset, ~400 MB
+
+```sh
+mkdir -p datasets/turing_1M/ && \
+ wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/query100K.fbin \
+ -O datasets/turing_1M/query.public.100K.fbin && \
+ wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/msturing-gt-1M \
+ -O datasets/turing_1M/groundtruth.public.100K.ibin && \
+ wget --header="Range: bytes=0-400000007" \
+ https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/base1b.fbin \
+ -O datasets/turing_1M/base.1M.fbin && \
+ python3 -c "
+import struct
+with open('datasets/turing_1M/base.1M.fbin', 'r+b') as f:
+ f.write(struct.pack('I', 1_000_000))
+"
+```
+
+```bash
+build_profile/bench_cpp \
+ --vectors datasets/turing_1M/base.1M.fbin \
+ --queries datasets/turing_1M/query.public.100K.fbin \
+ --neighbors datasets/turing_1M/groundtruth.public.100K.ibin \
+ --output datasets/turing_1M/index.usearch \
+ --dtype bf16 --metric l2sq
+```
+
+#### 10M subset, ~3.7 GB
+
+```sh
+mkdir -p datasets/turing_10M/ && \
+ wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/query100K.fbin \
+ -O datasets/turing_10M/query.public.100K.fbin && \
+ wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/msturing-gt-10M \
+ -O datasets/turing_10M/groundtruth.public.100K.ibin && \
+ wget --header="Range: bytes=0-4000000007" \
+ https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/base1b.fbin \
+ -O datasets/turing_10M/base.10M.fbin && \
+ python3 -c "
+import struct
+with open('datasets/turing_10M/base.10M.fbin', 'r+b') as f:
+ f.write(struct.pack('I', 10_000_000))
+"
+```
+
+```bash
+build_profile/bench_cpp \
+ --vectors datasets/turing_10M/base.10M.fbin \
+ --queries datasets/turing_10M/query.public.100K.fbin \
+ --neighbors datasets/turing_10M/groundtruth.public.100K.ibin \
+ --output datasets/turing_10M/index.usearch \
+ --dtype bf16 --metric l2sq
+```
+
+#### 100M subset, ~37 GB
+
+```sh
+mkdir -p datasets/turing_100M/ && \
+ wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/query100K.fbin \
+ -O datasets/turing_100M/query.public.100K.fbin && \
+ wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/msturing-gt-100M \
+ -O datasets/turing_100M/groundtruth.public.100K.ibin && \
+ wget --header="Range: bytes=0-40000000007" \
+ https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/base1b.fbin \
+ -O datasets/turing_100M/base.100M.fbin && \
+ python3 -c "
+import struct
+with open('datasets/turing_100M/base.100M.fbin', 'r+b') as f:
+ f.write(struct.pack('I', 100_000_000))
+"
+```
+
+```bash
+build_profile/bench_cpp \
+ --vectors datasets/turing_100M/base.100M.fbin \
+ --queries datasets/turing_100M/query.public.100K.fbin \
+ --neighbors datasets/turing_100M/groundtruth.public.100K.ibin \
+ --output datasets/turing_100M/index.usearch \
+ --dtype bf16 --metric l2sq
```
### Microsoft SpaceV
@@ -251,16 +458,14 @@ mkdir -p datasets/spacev_100M/ && \
wget -nc https://huggingface.co/datasets/unum-cloud/ann-spacev-100m/resolve/main/groundtruth.30K.f32bin -P datasets/spacev_100M/
```
-To run the benchmark on those 100M datasets:
-
```bash
build_profile/bench_cpp \
--vectors datasets/spacev_100M/base.100M.i8bin \
--queries datasets/spacev_100M/query.30K.i8bin \
--neighbors datasets/spacev_100M/groundtruth.30K.i32bin \
--output datasets/spacev_100M/index.usearch \
- --i8quant \
- --l2sq
+ --dtype i8 \
+ --metric l2sq
```
## Profiling
diff --git a/CITATION.cff b/CITATION.cff
index bd942eddc..9bfe14d71 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -8,4 +8,4 @@ title: "USearch by Unum Cloud"
version: 2.24.0
doi: 10.5281/zenodo.7949416
date-released: 2023-10-22
-url: "https://github.com/unum-cloud/usearch"
+url: "https://github.com/unum-cloud/USearch"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6e4bb483..36181a005 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@ project(
VERSION 2.24.0
LANGUAGES C CXX
DESCRIPTION "Smaller & Faster Single-File Vector Search Engine from Unum"
- HOMEPAGE_URL "https://github.com/unum-cloud/usearch"
+ HOMEPAGE_URL "https://github.com/unum-cloud/USearch"
)
# Determine if USearch is built as a subproject (using `add_subdirectory`) or if it is the main project
@@ -25,9 +25,8 @@ endif ()
option(USEARCH_INSTALL "Install CMake targets" OFF)
option(USEARCH_USE_OPENMP "Use OpenMP for a thread pool" OFF)
-option(USEARCH_USE_SIMSIMD "Use SimSIMD hardware-accelerated metrics" OFF)
+option(USEARCH_USE_NUMKONG "Use NumKong hardware-accelerated metrics" OFF)
option(USEARCH_USE_JEMALLOC "Use JeMalloc for faster memory allocations" OFF)
-option(USEARCH_USE_FP16LIB "Use software emulation for half-precision types" ON)
option(USEARCH_BUILD_TEST_CPP "Compile a native unit test in C++" ${USEARCH_IS_MAIN_PROJECT})
option(USEARCH_BUILD_BENCH_CPP "Compile a native benchmark in C++" ${USEARCH_IS_MAIN_PROJECT})
@@ -129,8 +128,7 @@ endif ()
target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_OPENMP=$")
# Supplementary compilation settings affecting "index_plugins.hpp"
-target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_FP16LIB=$")
-target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_SIMSIMD=$")
+target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_NUMKONG=$")
# Define which types can be compiled
target_compile_definitions(
@@ -150,19 +148,12 @@ target_include_directories(
${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} INTERFACE $
$
)
-if (USEARCH_USE_FP16LIB)
- target_include_directories(
- ${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} INTERFACE $
- $
- )
-endif ()
-if (USEARCH_USE_SIMSIMD)
+if (USEARCH_USE_NUMKONG)
target_include_directories(
${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE}
- INTERFACE $ $
+ INTERFACE $ $
)
endif ()
-
# Install a pkg-config file, so other tools can find this
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/pkg-config.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc")
@@ -221,9 +212,7 @@ if (NOT CMAKE_BUILD_TYPE)
endif ()
# Include directories
-set(USEARCH_HEADER_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/fp16/include"
- "${CMAKE_CURRENT_SOURCE_DIR}/simsimd/include"
-)
+set(USEARCH_HEADER_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/numkong/include")
# Function to setup target
function (setup_target TARGET_NAME)
@@ -383,23 +372,27 @@ function (setup_target TARGET_NAME)
target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_OPENMP=$")
# Supplementary compilation settings affecting "index_plugins.hpp"
- target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_FP16LIB=$")
- target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_SIMSIMD=$")
+ target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_NUMKONG=$")
- # Pass through SIMSIMD target definitions if they're set
- foreach(target IN ITEMS HASWELL SKYLAKE ICE GENOA SAPPHIRE TURIN SIERRA NEON NEON_I8 NEON_F16 NEON_BF16 SVE SVE_I8 SVE_F16 SVE_BF16 SVE2)
- if(DEFINED SIMSIMD_TARGET_${target})
- target_compile_definitions(${TARGET_NAME} PRIVATE "SIMSIMD_TARGET_${target}=${SIMSIMD_TARGET_${target}}")
- endif()
- endforeach()
-
- # Pass through SIMSIMD_DYNAMIC_DISPATCH if set
- if(DEFINED SIMSIMD_DYNAMIC_DISPATCH)
- target_compile_definitions(${TARGET_NAME} PRIVATE "SIMSIMD_DYNAMIC_DISPATCH=${SIMSIMD_DYNAMIC_DISPATCH}")
- endif()
+ # Link NumKong: the `nk_shared` target from `add_subdirectory(numkong)` carries its own include paths, compile
+ # definitions (NK_DYNAMIC_DISPATCH, NK_TARGET_*), and ISA probes.
+ if (USEARCH_USE_NUMKONG AND TARGET nk_shared)
+ target_compile_definitions(${TARGET_NAME} PRIVATE "NK_DYNAMIC_DISPATCH=1")
+ target_link_libraries(${TARGET_NAME} PRIVATE nk_shared)
+ endif ()
endfunction ()
+# Delegate NumKong compilation to its own CMakeLists.txt โ it handles ISA detection, NK_TARGET_* flags, and SIMD
+# dispatch backends internally.
+if (USEARCH_USE_NUMKONG)
+ set(NK_BUILD_SHARED
+ ON
+ CACHE BOOL "" FORCE
+ )
+ add_subdirectory(numkong)
+endif ()
+
# Must be called before "add_subdirectory()". See
# https://stackoverflow.com/questions/30250494/ctest-not-detecting-tests.
enable_testing()
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c4cfc937f..07605fcc5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -14,7 +14,7 @@ To keep the quality of the code high, we have a set of guidelines common to [all
## Before you start
Before building the first time, please pull `git` submodules.
-That's how we bring in SimSIMD and other optional dependencies to test all of the available functionality.
+That's how we bring in NumKong and other optional dependencies to test all of the available functionality.
```sh
git submodule update --init --recursive
@@ -26,7 +26,7 @@ Our primary C++ implementation uses CMake for builds.
If this is your first experience with CMake, use the following commands to get started:
```sh
-sudo apt-get update && sudo apt-get install cmake build-essential libjemalloc-dev g++-12 gcc-12 # Ubuntu
+sudo apt-get update && sudo apt-get install cmake build-essential libjemalloc-dev # Ubuntu
brew install libomp llvm # macOS
```
@@ -64,14 +64,13 @@ The CMakeLists.txt file has a number of options you can pass:
- `USEARCH_BUILD_SQLITE` - build the SQLite extension ([no Windows](https://gist.github.com/zeljic/d8b542788b225b1bcb5fce169ee28c55))
- Which dependencies to use:
- `USEARCH_USE_OPENMP` - use OpenMP for parallelism
- - `USEARCH_USE_SIMSIMD` - use SimSIMD for vectorization
+ - `USEARCH_USE_NUMKONG` - use NumKong for vectorization
- `USEARCH_USE_JEMALLOC` - use Jemalloc for memory management
- - `USEARCH_USE_FP16LIB` - use software emulation for half-precision floating point
Putting all of this together, compiling all targets on most platforms should work with the following snippet:
```sh
-cmake -D CMAKE_BUILD_TYPE=Release -D USEARCH_USE_FP16LIB=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1 -D USEARCH_USE_JEMALLOC=1 -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_BENCH_CPP=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_SQLITE=0 -B build_release
+cmake -D CMAKE_BUILD_TYPE=Release -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_JEMALLOC=1 -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_BENCH_CPP=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_SQLITE=0 -B build_release
cmake --build build_release --config Release
build_release/test_cpp
@@ -86,9 +85,8 @@ cmake \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_C_COMPILER="$(brew --prefix llvm)/bin/clang" \
-D CMAKE_CXX_COMPILER="$(brew --prefix llvm)/bin/clang++" \
- -D USEARCH_USE_FP16LIB=1 \
+ -D USEARCH_USE_NUMKONG=1 \
-D USEARCH_USE_OPENMP=1 \
- -D USEARCH_USE_SIMSIMD=1 \
-D USEARCH_USE_JEMALLOC=1 \
-D USEARCH_BUILD_TEST_CPP=1 \
-D USEARCH_BUILD_BENCH_CPP=1 \
@@ -178,8 +176,7 @@ cmake -B build_artifacts \
-D USEARCH_BUILD_LIB_C=1 \
-D USEARCH_BUILD_TEST_CPP=0 \
-D USEARCH_BUILD_BENCH_CPP=0 \
- -D USEARCH_USE_SIMSIMD=0 \
- -D USEARCH_USE_FP16LIB=1
+ -D USEARCH_USE_NUMKONG=0
cmake --build build_artifacts --config RelWithDebInfo
file build_artifacts/libusearch_c.so # Verify the output
@@ -214,7 +211,7 @@ Following options are enabled:
- The `-p no:warnings` option will suppress and allow warnings.
```sh
-uv pip install pytest pytest-repeat numpy # for repeated fuzzy tests
+uv pip install -e . --group tests # for repeated fuzzy tests
python -m pytest # if you trust the default settings
python -m pytest python/scripts/ -s -x -p no:warnings # to overwrite the default settings
```
@@ -222,8 +219,8 @@ python -m pytest python/scripts/ -s -x -p no:warnings # to overwrite the default
Linting:
```sh
-pip install ruff
-ruff --format=github --select=E9,F63,F7,F82 --target-version=py310 python
+uv pip install -e . --group lint
+ruff --format=github python
```
Before merging your changes you may want to test your changes against the entire matrix of Python versions USearch supports.
@@ -281,14 +278,13 @@ RUN npm init -y
RUN yum install tar git python3 cmake gcc-c++ -y && yum groupinstall "Development Tools" -y
# Assuming AWS Linux 2 uses old compilers:
-ENV USEARCH_USE_FP16LIB 1
-ENV USEARCH_USE_SIMSIMD 1
-ENV SIMSIMD_TARGET_HASWELL 1
-ENV SIMSIMD_TARGET_SKYLAKE 0
-ENV SIMSIMD_TARGET_ICE 0
-ENV SIMSIMD_TARGET_SAPPHIRE 0
-ENV SIMSIMD_TARGET_NEON 1
-ENV SIMSIMD_TARGET_SVE 0
+ENV USEARCH_USE_NUMKONG 1
+ENV NK_TARGET_HASWELL 1
+ENV NK_TARGET_SKYLAKE 0
+ENV NK_TARGET_ICELAKE 0
+ENV NK_TARGET_SAPPHIRE 0
+ENV NK_TARGET_NEON 1
+ENV NK_TARGET_SVE 0
# For specific PR:
# RUN npm install --build-from-source unum-cloud/usearch#pull/302/head
@@ -332,16 +328,16 @@ The reason for that is the heuristic that Cargo uses to determine the files to i
> Regardless of whether exclude or include is specified, the following files are always excluded:
> Any sub-packages will be skipped (any subdirectory that contains a Cargo.toml file).
-Assuming both SimSIMD and StringZilla contain their own `Cargo.toml` files, we need to temporarily exclude them from the package.
+Assuming both NumKong and StringZilla contain their own `Cargo.toml` files, we need to temporarily exclude them from the package.
```sh
-mv simsimd/Cargo.toml simsimd/Cargo.toml.bak
+mv numkong/Cargo.toml numkong/Cargo.toml.bak
mv stringzilla/Cargo.toml stringzilla/Cargo.toml.bak
cargo package --list --allow-dirty
cargo publish
# Revert back
-mv simsimd/Cargo.toml.bak simsimd/Cargo.toml
+mv numkong/Cargo.toml.bak numkong/Cargo.toml
mv stringzilla/Cargo.toml.bak stringzilla/Cargo.toml
```
@@ -388,7 +384,7 @@ USearch provides Go bindings, that depend on the C library that must be installe
So one should first compile the C library, link it with Go, and only then run tests.
```sh
-cmake -B build_release -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1
+cmake -B build_release -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_OPENMP=1
cmake --build build_release --config Release -j
cp build_release/libusearch_c.so golang/ # or .dylib to install the library on macOS
@@ -452,8 +448,7 @@ g++ -shared -o java\cloud\unum\usearch\USearchJNI.dll java\cloud\unum\usearch\cl
# macOS
g++ -std=c++11 -c -fPIC \
-Iinclude \
- -Ifp16/include \
- -Isimsimd/include \
+ -Inumkong/include \
-I${JAVA_HOME}/include -I${JAVA_HOME}/include/darwin java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp -o java/cloud/unum/usearch/cloud_unum_usearch_Index.o
g++ -dynamiclib -o java/cloud/unum/usearch/libusearch.dylib java/cloud/unum/usearch/cloud_unum_usearch_Index.o -lc
@@ -480,7 +475,7 @@ USearch provides CSharp bindings, that depend on the C library that must be inst
So one should first compile the C library, link it with CSharp, and only then run tests.
```sh
-cmake -B build_artifacts -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1
+cmake -B build_artifacts -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_OPENMP=1
cmake --build build_artifacts --config Release -j
```
@@ -561,8 +556,8 @@ cmake -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_PATH}/share/cmake/wasi-sdk.cmake .
## Working on Sub-Modules
-Extending metrics in SimSIMD:
+Extending metrics in NumKong:
```sh
-git push --set-upstream https://github.com/ashvardanian/simsimd.git HEAD:main
+git push --set-upstream https://github.com/ashvardanian/numkong.git HEAD:main
```
diff --git a/Cargo.lock b/Cargo.lock
deleted file mode 100644
index ee7e3d409..000000000
--- a/Cargo.lock
+++ /dev/null
@@ -1,495 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "anstyle"
-version = "1.0.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
-
-[[package]]
-name = "autocfg"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
-
-[[package]]
-name = "bitflags"
-version = "2.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
-
-[[package]]
-name = "cc"
-version = "1.2.29"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362"
-dependencies = [
- "shlex",
-]
-
-[[package]]
-name = "cfg-if"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
-
-[[package]]
-name = "clap"
-version = "4.5.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
-dependencies = [
- "clap_builder",
-]
-
-[[package]]
-name = "clap_builder"
-version = "4.5.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
-dependencies = [
- "anstyle",
- "clap_lex",
- "strsim",
-]
-
-[[package]]
-name = "clap_lex"
-version = "0.7.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
-
-[[package]]
-name = "codespan-reporting"
-version = "0.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81"
-dependencies = [
- "serde",
- "termcolor",
- "unicode-width",
-]
-
-[[package]]
-name = "cxx"
-version = "1.0.160"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be1149bab7a5580cb267215751389597c021bfad13c0bb00c54e19559333764c"
-dependencies = [
- "cc",
- "cxxbridge-cmd",
- "cxxbridge-flags",
- "cxxbridge-macro",
- "foldhash",
- "link-cplusplus",
-]
-
-[[package]]
-name = "cxx-build"
-version = "1.0.160"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6aeeaf1aefae8e0f5141920a7ecbc64a22ab038d4b4ac59f2d19e0effafd5b53"
-dependencies = [
- "cc",
- "codespan-reporting",
- "indexmap",
- "proc-macro2",
- "quote",
- "scratch",
- "syn",
-]
-
-[[package]]
-name = "cxxbridge-cmd"
-version = "1.0.160"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c36ac1f9a72064b1f41fd7b49a4c1b3bf33b9ccb1274874dda6d264f57c55964"
-dependencies = [
- "clap",
- "codespan-reporting",
- "indexmap",
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "cxxbridge-flags"
-version = "1.0.160"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "170c6ff5d009663866857a91ebee55b98ea4d4b34e7d7aba6dc4a4c95cc7b748"
-
-[[package]]
-name = "cxxbridge-macro"
-version = "1.0.160"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4984a142211026786011a7e79fa22faa1eca1e9cbf0e60bffecfd57fd3db88f1"
-dependencies = [
- "indexmap",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn",
-]
-
-[[package]]
-name = "equivalent"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
-
-[[package]]
-name = "foldhash"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
-
-[[package]]
-name = "fork_union"
-version = "2.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec1c7f61ff17f21416e5fece8704d86ca9c9b09d189c176ce9067e2b75f39b38"
-dependencies = [
- "cc",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
-dependencies = [
- "cfg-if",
- "libc",
- "r-efi",
- "wasi",
-]
-
-[[package]]
-name = "hashbrown"
-version = "0.15.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
-
-[[package]]
-name = "indexmap"
-version = "2.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
-dependencies = [
- "equivalent",
- "hashbrown",
-]
-
-[[package]]
-name = "libc"
-version = "0.2.174"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
-
-[[package]]
-name = "libm"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
-
-[[package]]
-name = "link-cplusplus"
-version = "1.0.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a6f6da007f968f9def0d65a05b187e2960183de70c160204ecfccf0ee330212"
-dependencies = [
- "cc",
-]
-
-[[package]]
-name = "num-traits"
-version = "0.2.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
-dependencies = [
- "autocfg",
- "libm",
-]
-
-[[package]]
-name = "ppv-lite86"
-version = "0.2.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
-dependencies = [
- "zerocopy",
-]
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.95"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.40"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "r-efi"
-version = "5.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
-
-[[package]]
-name = "rand"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
-dependencies = [
- "rand_chacha",
- "rand_core",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
-dependencies = [
- "ppv-lite86",
- "rand_core",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.9.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "rand_distr"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
-dependencies = [
- "num-traits",
- "rand",
-]
-
-[[package]]
-name = "rustversion"
-version = "1.0.21"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
-
-[[package]]
-name = "scratch"
-version = "1.0.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f6280af86e5f559536da57a45ebc84948833b3bee313a7dd25232e09c878a52"
-
-[[package]]
-name = "serde"
-version = "1.0.219"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.219"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "shlex"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
-
-[[package]]
-name = "strsim"
-version = "0.11.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
-
-[[package]]
-name = "syn"
-version = "2.0.104"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "termcolor"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
-dependencies = [
- "winapi-util",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.18"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
-
-[[package]]
-name = "unicode-width"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
-
-[[package]]
-name = "usearch"
-version = "2.24.0"
-dependencies = [
- "cxx",
- "cxx-build",
- "fork_union",
- "rand",
- "rand_chacha",
- "rand_distr",
-]
-
-[[package]]
-name = "wasi"
-version = "0.14.2+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
-dependencies = [
- "wit-bindgen-rt",
-]
-
-[[package]]
-name = "winapi-util"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
-dependencies = [
- "windows-sys",
-]
-
-[[package]]
-name = "windows-sys"
-version = "0.59.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
-dependencies = [
- "windows-targets",
-]
-
-[[package]]
-name = "windows-targets"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
-dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
-
-[[package]]
-name = "wit-bindgen-rt"
-version = "0.39.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
-dependencies = [
- "bitflags",
-]
-
-[[package]]
-name = "zerocopy"
-version = "0.8.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
-dependencies = [
- "zerocopy-derive",
-]
-
-[[package]]
-name = "zerocopy-derive"
-version = "0.8.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
diff --git a/Cargo.toml b/Cargo.toml
index a6c93838f..5f4043481 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,41 +1,41 @@
[package]
-name = "usearch"
-version = "2.24.0"
authors = ["Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>"]
description = "Smaller & Faster Single-File Vector Search Engine from Unum"
+documentation = "https://unum-cloud.github.io/USearch"
edition = "2021"
-license = "Apache-2.0"
-publish = true
-repository = "https://github.com/unum-cloud/usearch"
-documentation = "https://unum-cloud.github.io/usearch"
include = [
- "/rust/**",
- "/include/**",
- "/fp16/include/**",
- "/simsimd/include/**",
- "/simsimd/c/**",
- "/stringzilla/include/**",
- "/build.rs",
+ "/rust/**",
+ "/include/**",
+ "/stringzilla/include/**",
+ "/build.rs",
]
+license = "Apache-2.0"
+name = "usearch"
+publish = true
+repository = "https://github.com/unum-cloud/USearch"
+version = "2.24.0"
[features]
-default = ["simsimd", "fp16lib"] # SimSIMD is enabled by default
-simsimd = [] # No need to do anything to enable SimSIMD by default
-fp16lib = [] # Without this FP16 we lose precision downcasting
-openmp = [] # Optional: Users can enable OpenMP
+default = ["numkong"]
+numkong = ["dep:numkong"]
+openmp = []
+simsimd = ["numkong"]
[lib]
name = "usearch"
path = "rust/lib.rs"
[dependencies]
-cxx = "1.0.160"
+cxx = "1.0.194"
+# Published crates pull `numkong` from crates.io (the `path` is stripped at publish time).
+# The local `path = "numkong"` is only used during development against the git submodule.
+numkong = {version = ">=7.5.0", path = "numkong", optional = true}
[build-dependencies]
-cxx-build = "1.0.160"
+cxx-build = "1.0.194"
[dev-dependencies]
-fork_union = "2.1.1" # for concurrency tests
-rand_distr = "0.5.1" # uniform floats distribution
-rand_chacha = "0.9.0" # random number generator
-rand = "0.9.1"
+fork_union = "2.1.1" # for concurrency tests
+rand = "0.10.0"
+rand_chacha = "0.10.0" # random number generator
+rand_distr = "0.6.0" # uniform floats distribution
diff --git a/Package.swift b/Package.swift
index 646dff293..95da96c9c 100644
--- a/Package.swift
+++ b/Package.swift
@@ -4,10 +4,10 @@ import PackageDescription
let cxxSettings: [CXXSetting] = [
.headerSearchPath("../include/"),
- .headerSearchPath("../fp16/include/"),
- .headerSearchPath("../simsimd/include/"),
- .define("USEARCH_USE_FP16LIB", to: "1"),
- .define("USEARCH_USE_SIMSIMD", to: "1"),
+ .define("USEARCH_USE_NUMKONG", to: "1"),
+ .define("NK_DYNAMIC_DISPATCH", to: "1"),
+ .define("NK_NATIVE_F16", to: "0"),
+ .define("NK_NATIVE_BF16", to: "0"),
]
var targets: [Target] = []
@@ -17,8 +17,11 @@ var targets: [Target] = []
targets.append(
.target(
name: "USearchObjectiveC",
+ dependencies: [
+ .product(name: "CNumKongDispatch", package: "NumKong"),
+ ],
path: "objc",
- sources: ["USearchObjective.mm", "../simsimd/c/lib.c"],
+ sources: ["USearchObjective.mm"],
cxxSettings: cxxSettings
)
)
@@ -28,6 +31,9 @@ var targets: [Target] = []
targets += [
.target(
name: "USearchC",
+ dependencies: [
+ .product(name: "CNumKongDispatch", package: "NumKong"),
+ ],
path: "c",
sources: ["usearch.h", "lib.cpp"],
publicHeadersPath: ".",
@@ -70,8 +76,16 @@ products.append(
let package = Package(
name: "USearch",
+ platforms: [
+ .macOS(.v12),
+ .iOS(.v15),
+ .watchOS(.v8),
+ .tvOS(.v15),
+ ],
products: products,
- dependencies: [],
+ dependencies: [
+ .package(url: "https://github.com/ashvardanian/NumKong", from: "7.5.0"),
+ ],
targets: targets,
cxxLanguageStandard: CXXLanguageStandard.cxx11
)
diff --git a/README.md b/README.md
index 147a93d25..0a1319d99 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
USearch
Smaller & Faster Single-File
-Similarity Search & Clustering Engine for Vectors & ๐ Texts
+Similarity Search & Clustering Engine for Vectors & ๐ Texts
@@ -14,7 +14,7 @@ Similarity Search & Clustering Engine for
-
+
@@ -61,7 +61,7 @@ Linux โข macOS โข Windows โข iOS โข Android โข WebAssembly โข
- ๐ Near-real-time [clustering and sub-clustering](#clustering) for Tens or Millions of clusters.
[faiss]: https://github.com/facebookresearch/faiss
-[usearch-header]: https://github.com/unum-cloud/usearch/blob/main/include/usearch/index.hpp
+[usearch-header]: https://github.com/unum-cloud/USearch/blob/main/include/usearch/index.hpp
[obscure-use-cases]: https://ashvardanian.com/posts/abusing-vector-search
[hnsw-algorithm]: https://arxiv.org/abs/1603.09320
[simd]: https://en.wikipedia.org/wiki/Single_instruction,_multiple_data
@@ -185,8 +185,8 @@ one_in_many: Matches = search(vectors, vector, 50, MetricKind.L2sq, exact=True)
many_in_many: BatchMatches = search(vectors, vectors, 50, MetricKind.L2sq, exact=True)
```
-If you pass the `exact=True` argument, the system bypasses indexing altogether and performs a brute-force search through the entire dataset using SIMD-optimized similarity metrics from [SimSIMD](https://github.com/ashvardanian/simsimd).
-When compared to FAISS's `IndexFlatL2` in Google Colab, __[USearch may offer up to a 20x performance improvement](https://github.com/unum-cloud/usearch/issues/176#issuecomment-1666650778)__:
+If you pass the `exact=True` argument, the system bypasses indexing altogether and performs a brute-force search through the entire dataset using SIMD-optimized similarity metrics from [NumKong](https://github.com/ashvardanian/numkong).
+When compared to FAISS's `IndexFlatL2` in Google Colab, __[USearch may offer up to a 20x performance improvement](https://github.com/unum-cloud/USearch/issues/176#issuecomment-1666650778)__:
- `faiss.IndexFlatL2`: __55.3 ms__.
- `usearch.index.search`: __2.54 ms__.
@@ -268,7 +268,7 @@ As part of the quantization process, the vectors are normalized to unit length a
When quantizing to `b1x8_t` single-bit representations, note that it's only valid for binary metrics like Jaccard, Hamming, etc.
As part of the quantization process, the scalar components greater than zero are set to `true`, and the rest to `false`.
-
+
Using smaller numeric types will save you RAM needed to store the vectors, but you can also compress the neighbors lists forming our proximity graphs.
By default, 32-bit `uint32_t` is used to enumerate those, which is not enough if you need to address over 4 Billion entries.
@@ -533,15 +533,15 @@ index = Index(ndim=ndim, metric=CompiledMetric(
- [x] ScyllaDB: [Rust](https://github.com/scylladb/vector-store), [presentation](https://www.slideshare.net/slideshow/vector-search-with-scylladb-by-szymon-wasik/276571548).
- [x] TiDB & TiFlash: [C++](https://github.com/pingcap/tiflash), [announcement](https://www.pingcap.com/article/introduce-vector-search-indexes-in-tidb/).
- [x] YugaByte: [C++](https://github.com/yugabyte/yugabyte-db/blob/366b9f5e3c4df3a1a17d553db41d6dc50146f488/src/yb/vector_index/usearch_wrapper.cc).
-- [x] Google: [UniSim](https://github.com/google/unisim), [RetSim](https://arxiv.org/abs/2311.17264) paper.
- [x] MemGraph: [C++](https://github.com/memgraph/memgraph/blob/784dd8520f65050d033aea8b29446e84e487d091/src/storage/v2/indices/vector_index.cpp), [announcement](https://memgraph.com/blog/simplify-data-retrieval-memgraph-vector-search).
-- [x] LanternDB: [C++](https://github.com/lanterndata/lantern), [Rust](https://github.com/lanterndata/lantern_extras), [docs](https://lantern.dev/blog/hnsw-index-creation).
+- [x] Google: [UniSim](https://github.com/google/unisim), [RetSim](https://arxiv.org/abs/2311.17264) paper.
- [x] LangChain: [Python](https://github.com/langchain-ai/langchain/releases/tag/v0.0.257) and [JavaScript](https://github.com/hwchase17/langchainjs/releases/tag/0.0.125).
- [x] Microsoft Semantic Kernel: [Python](https://github.com/microsoft/semantic-kernel/releases/tag/python-0.3.9.dev) and C#.
- [x] GPTCache: [Python](https://github.com/zilliztech/GPTCache/releases/tag/0.1.29).
- [x] Sentence-Transformers: Python [docs](https://www.sbert.net/docs/package_reference/quantization.html#sentence_transformers.quantization.semantic_search_usearch).
- [x] Pathway: [Rust](https://github.com/pathwaycom/pathway).
- [x] Vald: [GoLang](https://github.com/vdaas/vald).
+- [x] MatrixOne: [GoLang](https://github.com/matrixorigin/matrixone).
## Citations
@@ -551,7 +551,7 @@ index = Index(ndim=ndim, metric=CompiledMetric(
doi = {10.5281/zenodo.7949416},
author = {Vardanian, Ash},
title = {{USearch by Unum Cloud}},
-url = {https://github.com/unum-cloud/usearch},
+url = {https://github.com/unum-cloud/USearch},
version = {2.24.0},
year = {2023},
month = oct,
diff --git a/binding.gyp b/binding.gyp
index dfe10d820..c62b5b869 100644
--- a/binding.gyp
+++ b/binding.gyp
@@ -1,8 +1,16 @@
{
+ "variables": {
+ "numkong_root": "./numkong",
+ },
+ "includes": ["./numkong/numkong.gypi"],
"targets": [
{
"target_name": "usearch",
- "sources": ["javascript/lib.cpp", "simsimd/c/lib.c"],
+ "sources": ["javascript/lib.cpp"],
+ "dependencies": [
+ " Result<(), Box> {
let mut build = cxx_build::bridge("rust/lib.rs");
build
@@ -6,59 +12,45 @@ fn main() {
.flag_if_supported("-Wno-unknown-pragmas")
.warnings(false)
.include("include")
- .include("rust")
- .include("fp16/include")
- .include("simsimd/include");
+ .include("rust");
// Check for optional features
if cfg!(feature = "openmp") {
build.define("USEARCH_USE_OPENMP", "1");
+ let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default();
+ if target_os == "windows" {
+ build.flag_if_supported("/openmp");
+ } else {
+ build.flag_if_supported("-fopenmp");
+ println!("cargo:rustc-link-lib=dylib=omp");
+ }
} else {
build.define("USEARCH_USE_OPENMP", "0");
}
- if cfg!(feature = "fp16lib") {
- build.define("USEARCH_USE_FP16LIB", "1");
- } else {
- build.define("USEARCH_USE_FP16LIB", "0");
- }
-
- // Define all possible SIMD targets as 1
- let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default();
-
- let mut flags_to_try;
- if cfg!(feature = "simsimd") {
+ // When the `numkong` feature is enabled, the `numkong` crate (pulled from crates.io,
+ // not the local git submodule) compiles all SIMD kernels itself, with dynamic dispatch
+ // and fallback across ISA backends. We only need its include path for the C++ headers.
+ if cfg!(feature = "numkong") {
+ let numkong_include = std::env::var("DEP_NUMKONG_INCLUDE")
+ .map_err(|_| "numkong crate must set DEP_NUMKONG_INCLUDE via `links` metadata")?;
build
- .file("simsimd/c/lib.c")
- .define("USEARCH_USE_SIMSIMD", "1")
- .define("SIMSIMD_DYNAMIC_DISPATCH", "1")
- .define("SIMSIMD_NATIVE_BF16", "0")
- .define("SIMSIMD_NATIVE_F16", "0");
- flags_to_try = match target_arch.as_str() {
- "arm" | "aarch64" => vec![
- "SIMSIMD_TARGET_NEON",
- "SIMSIMD_TARGET_NEON_I8",
- "SIMSIMD_TARGET_NEON_F16",
- "SIMSIMD_TARGET_NEON_BF16",
- "SIMSIMD_TARGET_SVE",
- "SIMSIMD_TARGET_SVE_I8",
- "SIMSIMD_TARGET_SVE_F16",
- "SIMSIMD_TARGET_SVE_BF16",
- ],
- _ => vec![
- "SIMSIMD_TARGET_HASWELL",
- "SIMSIMD_TARGET_SKYLAKE",
- "SIMSIMD_TARGET_ICE",
- "SIMSIMD_TARGET_GENOA",
- "SIMSIMD_TARGET_SAPPHIRE",
- ],
- };
+ .include(&numkong_include)
+ .define("USEARCH_USE_NUMKONG", "1")
+ .define("NK_DYNAMIC_DISPATCH", "1")
+ .define("NK_NATIVE_BF16", "0")
+ .define("NK_NATIVE_F16", "0");
+
+ // Link the NumKong static library compiled by the numkong crate. Cargo propagates
+ // the library search path via `links` metadata, but doesn't re-emit `-lnumkong`
+ // for downstream native code. Our C++ libusearch.a references NumKong symbols
+ // (nk_find_kernel_punned, nk_capabilities), so we must link explicitly.
+ println!("cargo:rustc-link-lib=static=numkong");
} else {
- build.define("USEARCH_USE_SIMSIMD", "0");
- flags_to_try = vec![];
+ build.define("USEARCH_USE_NUMKONG", "0");
}
- let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap();
+ let target_os = std::env::var("CARGO_CFG_TARGET_OS")?;
// Conditional compilation depending on the target operating system.
if target_os == "linux" || target_os == "android" {
build
@@ -89,33 +81,7 @@ fn main() {
.define("_ALLOW_POINTER_TO_CONST_MISMATCH", None);
}
- let base_build = build.clone();
-
- let mut pop_flag = None;
- loop {
- let mut sub_build = base_build.clone();
- for flag in &flags_to_try {
- sub_build.define(flag, "1");
- }
- let result = sub_build.try_compile("usearch");
- if result.is_err() {
- if let Some(flag) = pop_flag {
- println!(
- "cargo:warning=Failed to compile after disabling {:?}, trying next configuration...",
- flag
- );
- } else if !flags_to_try.is_empty() {
- print!("cargo:warning=Failed to compile with all SIMD backends...");
- }
-
- pop_flag = flags_to_try.pop();
- if pop_flag.is_none() {
- result.unwrap();
- }
- } else {
- break;
- }
- }
+ build.try_compile("usearch")?;
println!("cargo:rerun-if-changed=rust/lib.rs");
println!("cargo:rerun-if-changed=rust/lib.cpp");
@@ -123,4 +89,5 @@ fn main() {
println!("cargo:rerun-if-changed=include/index_plugins.hpp");
println!("cargo:rerun-if-changed=include/index_dense.hpp");
println!("cargo:rerun-if-changed=include/usearch/index.hpp");
+ Ok(())
}
diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt
index dcdd39dbe..5e10c405d 100644
--- a/c/CMakeLists.txt
+++ b/c/CMakeLists.txt
@@ -1,9 +1,4 @@
-
-# Define the source files conditionally
set(USEARCH_LIB_SOURCES lib.cpp)
-if (USEARCH_USE_SIMSIMD)
- list(APPEND USEARCH_LIB_SOURCES ../simsimd/c/lib.c)
-endif ()
if (USEARCH_BUILD_TEST_C)
add_executable(test_c test.c ${USEARCH_LIB_SOURCES})
@@ -11,6 +6,10 @@ if (USEARCH_BUILD_TEST_C)
include(CTest)
enable_testing()
add_test(NAME test_c COMMAND test_c)
+
+ # Export the dynamic symbol table so `backtrace_symbols_fd` can resolve
+ # function names when the in-test crash handler fires.
+ set_target_properties(test_c PROPERTIES ENABLE_EXPORTS ON)
endif ()
# This article discusses a better way to allow building either static or shared libraries:
diff --git a/c/README.md b/c/README.md
index 3dac28a14..c47e4a507 100644
--- a/c/README.md
+++ b/c/README.md
@@ -3,7 +3,7 @@
## Installation
USearch vector-search engine can be integrated into your project using CMake.
-Alternatively, you can download one of the precompiled binaries from the [releases page](https://github.com/unum-cloud/usearch/releases).
+Alternatively, you can download one of the precompiled binaries from the [releases page](https://github.com/unum-cloud/USearch/releases).
## Quickstart
@@ -89,7 +89,7 @@ usearch_metadata_buffer(buffer, bytes, &opts, &error);
## Metrics
-USearch comes pre-packaged with SimSIMD, bringing over 100 SIMD-accelerated distance kernels for x86 and ARM architectures.
+USearch comes pre-packaged with NumKong, bringing over 100 SIMD-accelerated distance kernels for x86 and ARM architectures.
That includes:
- `usearch_metric_cos_k` - Cosine Similarity metric, defined as `Cos = 1 - sum(a[i] * b[i]) / (sqrt(sum(a[i]^2) * sqrt(sum(b[i]^2)))`.
@@ -107,7 +107,7 @@ That includes:
You can also define your own metrics by implementing the `usearch_metric_t` interface:
```c
-simsimd_distance_t callback(void const* a, void const* b, void* state) {
+usearch_distance_t callback(void const* a, void const* b, void* state) {
// Your custom metric implementation here
}
@@ -183,15 +183,15 @@ Alternatively, you can benefit from faster thread-pools and priority queues for
size_t threads = 0;
size_t top_k = 10;
size_t dataset_count = 1000, queries_count = 10;
-simsimd_f16_t dataset[dataset_count][dimensions];
-simsimd_f16_t queries[queries_count][dimensions];
+nk_f16_t dataset[dataset_count][dimensions];
+nk_f16_t queries[queries_count][dimensions];
usearch_key_t resulting_keys[queries_count][top_k];
usearch_distance_t resulting_distances[queries_count][top_k];
usearch_exact_search(
- &dataset[0][0], dataset_count, dimensions * sizeof(simsimd_f16_t),
- &queries[0][0], queries_count, dimensions * sizeof(simsimd_f16_t),
+ &dataset[0][0], dataset_count, dimensions * sizeof(nk_f16_t),
+ &queries[0][0], queries_count, dimensions * sizeof(nk_f16_t),
usearch_scalar_f16_k, top_k, threads,
&resulting_keys[0][0], sizeof(usearch_key_t) * top_k,
&resulting_distances[0][0], sizeof(usearch_distance_t) * top_k,
diff --git a/c/lib.cpp b/c/lib.cpp
index fa1b8a023..05f54bbba 100644
--- a/c/lib.cpp
+++ b/c/lib.cpp
@@ -56,11 +56,16 @@ usearch_metric_kind_t metric_kind_to_c(metric_kind_t kind) {
}
scalar_kind_t scalar_kind_to_cpp(usearch_scalar_kind_t kind) {
switch (kind) {
- case usearch_scalar_f32_k: return scalar_kind_t::f32_k;
case usearch_scalar_f64_k: return scalar_kind_t::f64_k;
- case usearch_scalar_f16_k: return scalar_kind_t::f16_k;
+ case usearch_scalar_f32_k: return scalar_kind_t::f32_k;
case usearch_scalar_bf16_k: return scalar_kind_t::bf16_k;
+ case usearch_scalar_f16_k: return scalar_kind_t::f16_k;
+ case usearch_scalar_e5m2_k: return scalar_kind_t::e5m2_k;
+ case usearch_scalar_e4m3_k: return scalar_kind_t::e4m3_k;
+ case usearch_scalar_e3m2_k: return scalar_kind_t::e3m2_k;
+ case usearch_scalar_e2m3_k: return scalar_kind_t::e2m3_k;
case usearch_scalar_i8_k: return scalar_kind_t::i8_k;
+ case usearch_scalar_u8_k: return scalar_kind_t::u8_k;
case usearch_scalar_b1_k: return scalar_kind_t::b1x8_k;
default: return scalar_kind_t::unknown_k;
}
@@ -68,11 +73,16 @@ scalar_kind_t scalar_kind_to_cpp(usearch_scalar_kind_t kind) {
usearch_scalar_kind_t scalar_kind_to_c(scalar_kind_t kind) {
switch (kind) {
- case scalar_kind_t::f32_k: return usearch_scalar_f32_k;
case scalar_kind_t::f64_k: return usearch_scalar_f64_k;
- case scalar_kind_t::f16_k: return usearch_scalar_f16_k;
+ case scalar_kind_t::f32_k: return usearch_scalar_f32_k;
case scalar_kind_t::bf16_k: return usearch_scalar_bf16_k;
+ case scalar_kind_t::f16_k: return usearch_scalar_f16_k;
+ case scalar_kind_t::e5m2_k: return usearch_scalar_e5m2_k;
+ case scalar_kind_t::e4m3_k: return usearch_scalar_e4m3_k;
+ case scalar_kind_t::e3m2_k: return usearch_scalar_e3m2_k;
+ case scalar_kind_t::e2m3_k: return usearch_scalar_e2m3_k;
case scalar_kind_t::i8_k: return usearch_scalar_i8_k;
+ case scalar_kind_t::u8_k: return usearch_scalar_u8_k;
case scalar_kind_t::b1x8_k: return usearch_scalar_b1_k;
default: return usearch_scalar_unknown_k;
}
@@ -80,11 +90,12 @@ usearch_scalar_kind_t scalar_kind_to_c(scalar_kind_t kind) {
add_result_t add_(index_dense_t* index, usearch_key_t key, void const* vector, scalar_kind_t kind) {
switch (kind) {
- case scalar_kind_t::f32_k: return index->add(key, (f32_t const*)vector);
case scalar_kind_t::f64_k: return index->add(key, (f64_t const*)vector);
- case scalar_kind_t::f16_k: return index->add(key, (f16_t const*)vector);
+ case scalar_kind_t::f32_k: return index->add(key, (f32_t const*)vector);
case scalar_kind_t::bf16_k: return index->add(key, (bf16_t const*)vector);
+ case scalar_kind_t::f16_k: return index->add(key, (f16_t const*)vector);
case scalar_kind_t::i8_k: return index->add(key, (i8_t const*)vector);
+ case scalar_kind_t::u8_k: return index->add(key, (u8_t const*)vector);
case scalar_kind_t::b1x8_k: return index->add(key, (b1x8_t const*)vector);
default: return add_result_t{}.failed("Unknown scalar kind!");
}
@@ -92,11 +103,12 @@ add_result_t add_(index_dense_t* index, usearch_key_t key, void const* vector, s
std::size_t get_(index_dense_t* index, usearch_key_t key, size_t count, void* vector, scalar_kind_t kind) {
switch (kind) {
- case scalar_kind_t::f32_k: return index->get(key, (f32_t*)vector, count);
case scalar_kind_t::f64_k: return index->get(key, (f64_t*)vector, count);
- case scalar_kind_t::f16_k: return index->get(key, (f16_t*)vector, count);
+ case scalar_kind_t::f32_k: return index->get(key, (f32_t*)vector, count);
case scalar_kind_t::bf16_k: return index->get(key, (bf16_t*)vector, count);
+ case scalar_kind_t::f16_k: return index->get(key, (f16_t*)vector, count);
case scalar_kind_t::i8_k: return index->get(key, (i8_t*)vector, count);
+ case scalar_kind_t::u8_k: return index->get(key, (u8_t*)vector, count);
case scalar_kind_t::b1x8_k: return index->get(key, (b1x8_t*)vector, count);
default: return search_result_t(*index).failed("Unknown scalar kind!");
}
@@ -106,16 +118,18 @@ template
search_result_t search_(index_dense_t* index, void const* vector, scalar_kind_t kind, size_t n,
predicate_at&& predicate = predicate_at{}) {
switch (kind) {
- case scalar_kind_t::f32_k:
- return index->filtered_search((f32_t const*)vector, n, std::forward(predicate));
case scalar_kind_t::f64_k:
return index->filtered_search((f64_t const*)vector, n, std::forward(predicate));
- case scalar_kind_t::f16_k:
- return index->filtered_search((f16_t const*)vector, n, std::forward(predicate));
+ case scalar_kind_t::f32_k:
+ return index->filtered_search((f32_t const*)vector, n, std::forward(predicate));
case scalar_kind_t::bf16_k:
return index->filtered_search((bf16_t const*)vector, n, std::forward(predicate));
+ case scalar_kind_t::f16_k:
+ return index->filtered_search((f16_t const*)vector, n, std::forward(predicate));
case scalar_kind_t::i8_k:
return index->filtered_search((i8_t const*)vector, n, std::forward(predicate));
+ case scalar_kind_t::u8_k:
+ return index->filtered_search((u8_t const*)vector, n, std::forward(predicate));
case scalar_kind_t::b1x8_k:
return index->filtered_search((b1x8_t const*)vector, n, std::forward(predicate));
default: return search_result_t(*index).failed("Unknown scalar kind!");
@@ -133,6 +147,10 @@ USEARCH_EXPORT char const* usearch_version(void) {
return version;
}
+USEARCH_EXPORT char const* usearch_hardware_acceleration_compiled(void) { return hardware_acceleration_compiled(); }
+
+USEARCH_EXPORT char const* usearch_hardware_acceleration_available(void) { return hardware_acceleration_available(); }
+
USEARCH_EXPORT usearch_index_t usearch_init(usearch_init_options_t* options, usearch_error_t* error) {
USEARCH_ASSERT(error && "Missing arguments");
@@ -413,7 +431,7 @@ USEARCH_EXPORT size_t usearch_search(
USEARCH_EXPORT size_t usearch_filtered_search( //
usearch_index_t index, //
void const* query, usearch_scalar_kind_t query_kind, size_t results_limit, //
- usearch_filtered_search_callback_t filter, void* filter_state, //
+ usearch_filtered_search_callback_t filter, void* filter_state, //
usearch_key_t* found_keys, usearch_distance_t* found_distances, usearch_error_t* error) {
USEARCH_ASSERT(index && query && filter && error && "Missing arguments");
diff --git a/c/test.c b/c/test.c
index dd4a07f0a..361fee1c8 100644
--- a/c/test.c
+++ b/c/test.c
@@ -1,10 +1,91 @@
+/**
+ * @file test.c
+ * @author Ash Vardanian
+ * @brief Unit tests for the pure-C ABI of USearch (`usearch.h`).
+ * @date June 25, 2023
+ *
+ * Exercises the lifecycle of `usearch_index_t` through the public C surface:
+ * index creation with every supported metric and scalar kind, `add` / `get` /
+ * `find` / `remove`, on-disk `save` / `load` / `view`, and error propagation
+ * via `usearch_error_t`. The harness is intentionally dependency-free so it
+ * can run in the same matrix as the C++ tests and on cross-compilation
+ * targets where only the C runtime is available.
+ *
+ * On startup we install a signal handler (see `install_crash_handlers`) that
+ * prints a native back-trace before re-raising, so CI logs pinpoint the
+ * faulting frame instead of stopping at a bare exit code.
+ */
#include
-#include // `remove`
+#include // `signal`, `raise`, `SIGSEGV`
+#include // `remove`
#include
+#include // `memset`
#include
+/* Back-trace support for the C test harness. The `signal` API is standard C;
+ * the back-trace itself is taken via an OS-specific facility since C has no
+ * standard stack-introspection API. On Windows, `dbghelp.h` references types
+ * (e.g. `PSTR`) that are only defined after `windows.h`, so the two headers
+ * are separated by a blank line to keep clang-format from re-sorting them
+ * into a single alphabetized block. */
+#if defined(_WIN32)
+#include
+
+#include
+#pragma comment(lib, "Dbghelp.lib")
+#elif defined(__unix__) || defined(__APPLE__)
+#include
+#include
+#endif
+
#include "usearch.h"
+static void usearch_write_backtrace(int signal_number) {
+ fprintf(stderr, "\n[usearch] Fatal signal %d. Back-trace:\n", signal_number);
+#if defined(_WIN32)
+ enum { backtrace_depth_limit = 64 };
+ void* backtrace_frames[backtrace_depth_limit];
+ USHORT backtrace_depth = CaptureStackBackTrace(0, backtrace_depth_limit, backtrace_frames, NULL);
+ HANDLE current_process = GetCurrentProcess();
+ SymInitialize(current_process, NULL, TRUE);
+
+ unsigned char symbol_info_buffer[sizeof(SYMBOL_INFO) + 256 * sizeof(char)];
+ SYMBOL_INFO* symbol_info = (SYMBOL_INFO*)symbol_info_buffer;
+ symbol_info->MaxNameLen = 255;
+ symbol_info->SizeOfStruct = sizeof(SYMBOL_INFO);
+
+ for (USHORT frame_index = 0; frame_index < backtrace_depth; ++frame_index) {
+ if (SymFromAddr(current_process, (DWORD64)backtrace_frames[frame_index], 0, symbol_info))
+ fprintf(stderr, " #%2u %s + 0x%llx\n", (unsigned)frame_index, symbol_info->Name,
+ (unsigned long long)((DWORD64)backtrace_frames[frame_index] - symbol_info->Address));
+ else
+ fprintf(stderr, " #%2u %p\n", (unsigned)frame_index, backtrace_frames[frame_index]);
+ }
+#elif defined(__unix__) || defined(__APPLE__)
+ enum { backtrace_depth_limit = 64 };
+ void* backtrace_frames[backtrace_depth_limit];
+ int backtrace_depth = backtrace(backtrace_frames, backtrace_depth_limit);
+ backtrace_symbols_fd(backtrace_frames, backtrace_depth, STDERR_FILENO);
+#else
+ (void)signal_number;
+ fprintf(stderr, " \n");
+#endif
+ fflush(stderr);
+}
+
+static void usearch_crash_handler(int signal_number) {
+ usearch_write_backtrace(signal_number);
+ /* Restore the default disposition and re-raise so the shell / CI sees the true exit status. */
+ signal(signal_number, SIG_DFL);
+ raise(signal_number);
+}
+
+static void install_crash_handlers(void) {
+ int const fatal_signals[] = {SIGSEGV, SIGABRT, SIGILL, SIGFPE};
+ for (unsigned signal_index = 0; signal_index < sizeof(fatal_signals) / sizeof(fatal_signals[0]); ++signal_index)
+ signal(fatal_signals[signal_index], &usearch_crash_handler);
+}
+
void expect(bool must_be_true, char const* message) {
if (must_be_true)
return;
@@ -368,6 +449,7 @@ void test_view(size_t const collection_size, size_t const dimensions) {
}
int main(int argc, char const* argv[]) {
+ install_crash_handlers();
printf("Running tests...\n");
printf("USearch version: %s\n", usearch_version());
diff --git a/c/usearch.h b/c/usearch.h
index 7921c192c..49ec2a51e 100644
--- a/c/usearch.h
+++ b/c/usearch.h
@@ -53,12 +53,17 @@ USEARCH_EXPORT typedef enum usearch_metric_kind_t {
USEARCH_EXPORT typedef enum usearch_scalar_kind_t {
usearch_scalar_unknown_k = 0,
- usearch_scalar_f32_k = 1,
usearch_scalar_f64_k = 2,
+ usearch_scalar_f32_k = 1,
+ usearch_scalar_bf16_k = 6,
usearch_scalar_f16_k = 3,
+ usearch_scalar_e5m2_k = 7,
+ usearch_scalar_e4m3_k = 8,
+ usearch_scalar_e3m2_k = 11,
+ usearch_scalar_e2m3_k = 10,
usearch_scalar_i8_k = 4,
+ usearch_scalar_u8_k = 9,
usearch_scalar_b1_k = 5,
- usearch_scalar_bf16_k = 6,
} usearch_scalar_kind_t;
USEARCH_EXPORT typedef struct usearch_init_options_t {
@@ -109,8 +114,6 @@ USEARCH_EXPORT typedef struct usearch_init_options_t {
bool multi;
} usearch_init_options_t;
-extern int goFilteredSearchCallback(usearch_key_t, void*);
-
USEARCH_EXPORT typedef int (*usearch_filtered_search_callback_t)(usearch_key_t, void*);
/**
@@ -119,6 +122,18 @@ USEARCH_EXPORT typedef int (*usearch_filtered_search_callback_t)(usearch_key_t,
*/
USEARCH_EXPORT char const* usearch_version(void);
+/**
+ * @brief Retrieves a list of hardware capabilities in this precompiled binary.
+ * @return A comma-separated string with names of CPU features.
+ */
+USEARCH_EXPORT char const* usearch_hardware_acceleration_compiled(void);
+
+/**
+ * @brief Retrieves a list of hardware capabilities supported by the current machine.
+ * @return A comma-separated string with names of CPU features.
+ */
+USEARCH_EXPORT char const* usearch_hardware_acceleration_available(void);
+
/**
* @brief Initializes a new instance of the index.
* @param options Pointer to the `usearch_init_options_t` structure containing initialization options.
@@ -395,7 +410,7 @@ USEARCH_EXPORT size_t usearch_search( //
USEARCH_EXPORT size_t usearch_filtered_search( //
usearch_index_t index, //
void const* query_vector, usearch_scalar_kind_t query_kind, size_t count, //
- usearch_filtered_search_callback_t filter, void* filter_state, //
+ usearch_filtered_search_callback_t filter, void* filter_state, //
usearch_key_t* keys, usearch_distance_t* distances, usearch_error_t* error);
/**
@@ -484,6 +499,8 @@ USEARCH_EXPORT void usearch_exact_search( //
*/
USEARCH_EXPORT void usearch_clear(usearch_index_t index, usearch_error_t* error);
+extern int goFilteredSearchCallback(usearch_key_t, void*);
+
#ifdef __cplusplus
}
#endif
diff --git a/conanfile.py b/conanfile.py
index b13fe4874..48f205991 100644
--- a/conanfile.py
+++ b/conanfile.py
@@ -10,7 +10,7 @@ class USearchConan(ConanFile):
version = "2.24.0"
license = "Apache-2.0"
description = "Smaller & Faster Single-File Vector Search Engine from Unum"
- homepage = "https://github.com/unum-cloud/usearch"
+ homepage = "https://github.com/unum-cloud/USearch"
topics = ("search", "vector", "simd")
settings = "os", "arch", "compiler", "build_type"
url = "https://github.com/conan-io/conan-center-index"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8301de6cb..7cea50f1e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -7,6 +7,10 @@ if (USEARCH_BUILD_TEST_CPP)
target_include_directories(test_cpp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../stringzilla/include)
+ # Export the dynamic symbol table so `backtrace_symbols` / `std::stacktrace`
+ # can resolve function names when the in-test crash handler fires.
+ set_target_properties(test_cpp PROPERTIES ENABLE_EXPORTS ON)
+
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
target_compile_options(test_cpp PRIVATE -Wno-vla -Wno-unused-function -Wno-cast-function-type)
endif ()
diff --git a/cpp/README.md b/cpp/README.md
index 74c692507..5b8f18d3e 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -6,7 +6,7 @@ To use in a C++ project, copy the `include/usearch/*` headers into your project.
Alternatively, fetch it with CMake:
```cmake
-FetchContent_Declare(usearch GIT_REPOSITORY https://github.com/unum-cloud/usearch.git)
+FetchContent_Declare(usearch GIT_REPOSITORY https://github.com/unum-cloud/USearch.git)
FetchContent_MakeAvailable(usearch)
```
@@ -43,9 +43,9 @@ int main(int argc, char **argv) {
Here we:
-- define a metric of kind [`metric_kind_t::l2sq_k`](https://unum-cloud.github.io/usearch/cpp/reference.html#_CPPv413metric_kind_t),
-- to be applied to [`scalar_kind_t::f32_k`](https://unum-cloud.github.io/usearch/cpp/reference.html#_CPPv413scalar_kind_t) floating-point vectors,
-- instantiate an [`index_dense_t`](https://unum-cloud.github.io/usearch/cpp/reference.html#_CPPv4I00EN4unum7usearch14index_dense_gtE) index.
+- define a metric of kind [`metric_kind_t::l2sq_k`](https://unum-cloud.github.io/USearch/cpp/reference.html#_CPPv413metric_kind_t),
+- to be applied to [`scalar_kind_t::f32_k`](https://unum-cloud.github.io/USearch/cpp/reference.html#_CPPv413scalar_kind_t) floating-point vectors,
+- instantiate an [`index_dense_t`](https://unum-cloud.github.io/USearch/cpp/reference.html#_CPPv4I00EN4unum7usearch14index_dense_gtE) index.
The `add` is thread-safe for concurrent index construction.
It also has an overload for different vector types, casting them under the hood.
@@ -174,7 +174,7 @@ In its high-level interface, USearch supports a variety of metrics, including th
- `metric_haversine_gt` for "Haversine" or "Great Circle" distance between coordinates used in GIS applications.
- `metric_divergence_gt` for the "Jensen Shannon" similarity between probability distributions.
-In reality, for most common types, one of the [SimSIMD](https://github.com/ashvardanian/SimSIMD) backends will be triggered, providing hardware-acceleration for most common CPUs.
+In reality, for most common types, one of the [NumKong](https://github.com/ashvardanian/numkong) backends will be triggered, providing hardware-acceleration for most common CPUs.
If you need a different metric, you can implement it yourself and wrap it into a `metric_punned_t`, which is our alternative to the `std::function`.
Unlike the `std::function`, it is a trivial type, which is important for performance.
diff --git a/cpp/bench.cpp b/cpp/bench.cpp
index 740616aad..c04edab2c 100644
--- a/cpp/bench.cpp
+++ b/cpp/bench.cpp
@@ -26,16 +26,18 @@
#include // `stat`
-#include
#include
#include
-#include // `std::cerr`
-#include // `std::iota`
-#include // `std::invalid_argument`
-#include // `std::to_string`
-#include // `std::thread::hardware_concurrency()`
-#include // `std::monostate`
-#include
+
+#include // ?
+#include // `std::cerr`
+#include // `std::iota`
+#include // `std::invalid_argument`
+#include // `std::to_string`
+#include // `std::thread::hardware_concurrency()`
+#include // `std::unordered_map`
+#include // `std::monostate`
+#include // `std::vector`
#include // Command Line Interface
#if USEARCH_USE_OPENMP
@@ -112,9 +114,10 @@ struct alignas(32) persisted_matrix_gt {
if (fstat(file_descriptor, &stat_vectors) == -1)
throw std::invalid_argument("Couldn't obtain file stats");
raw_length = stat_vectors.st_size;
- raw_handle = (std::uint8_t*)mmap(NULL, raw_length, PROT_READ, MAP_PRIVATE, file_descriptor, 0);
- if (raw_handle == nullptr)
+ auto* result = mmap(NULL, raw_length, PROT_READ, MAP_PRIVATE, file_descriptor, 0);
+ if (result == MAP_FAILED)
throw std::invalid_argument("Couldn't memory-map the file");
+ raw_handle = (std::uint8_t*)result;
std::memcpy(&rows, raw_handle, sizeof(rows));
std::memcpy(&cols, raw_handle + sizeof(rows), sizeof(cols));
scalars = (scalar_t*)(raw_handle + sizeof(rows) + sizeof(cols));
@@ -201,14 +204,20 @@ struct persisted_dataset_gt {
for (std::size_t i = 0; i < ids_matrix.rows; ++i)
vector_ids_[i] = static_cast(*ids_matrix.row(i));
}
+
+ // When custom IDs are loaded and self-search is active, populate
+ // neighborhoods_iota_ with the actual IDs so recall comparison works.
+ if (has_vector_ids() && !queries_.scalars && !neighborhoods_.scalars) {
+ for (std::size_t i = 0; i < neighborhoods_iota_.size(); ++i)
+ neighborhoods_iota_[i] = static_cast(vector_ids_[i]);
+ }
}
bool search_itself() const noexcept { return vectors_count() && !queries_.rows; }
bool has_vector_ids() const noexcept { return !vector_ids_.empty(); }
default_key_t vector_id(std::size_t i) const noexcept {
- return has_vector_ids() ? vector_ids_[i + vectors_to_skip_]
- : static_cast(i + vectors_to_skip_);
+ return has_vector_ids() ? vector_ids_[i + vectors_to_skip_] : static_cast(i + vectors_to_skip_);
}
std::size_t dimensions() const noexcept { return vectors_.cols; }
@@ -224,9 +233,7 @@ struct persisted_dataset_gt {
std::size_t vectors_count() const noexcept {
return vectors_to_take_ ? vectors_to_take_ : (vectors_.rows - vectors_to_skip_);
}
- matrix_slice_gt vectors_view() const noexcept {
- return {vector(vectors_to_skip_), vectors_count(), dimensions()};
- }
+ matrix_slice_gt vectors_view() const noexcept { return {vector(0), vectors_count(), dimensions()}; }
};
template //
@@ -288,7 +295,7 @@ struct running_stats_printer_t {
std::size_t count = progress.load();
timestamp_t time = std::chrono::high_resolution_clock::now();
std::size_t duration = std::chrono::duration_cast(time - start_time).count();
- float vectors_per_second = count * 1e9 / duration;
+ float vectors_per_second = static_cast(count * 1e9 / duration);
std::printf("\r\33[2K100 %% completed, %.0f vectors/s\n", vectors_per_second);
}
@@ -314,7 +321,7 @@ struct running_stats_printer_t {
timestamp_t time_new = std::chrono::high_resolution_clock::now();
std::size_t duration =
std::chrono::duration_cast(time_new - last_printed_time).count();
- float vectors_per_second = count_new * 1e9 / duration;
+ float vectors_per_second = static_cast(count_new * 1e9 / duration);
std::printf("\r%3.3f%% [%.*s%*s] %.0f vectors/s, finished %zu/%zu", percentage * 100.f, left_pad, bars_k,
right_pad, "", vectors_per_second, progress, total);
@@ -377,9 +384,8 @@ void search_many( //
}
template //
-static void single_shot(dataset_at& dataset, index_at& index, bool construct = true) {
+static void single_shot(dataset_at& dataset, index_at& index, bool construct = true, bool bench_join = false) {
using distance_t = typename index_at::distance_t;
- constexpr default_key_t missing_key = std::numeric_limits::max();
std::printf("\n");
std::printf("------------\n");
@@ -391,6 +397,9 @@ static void single_shot(dataset_at& dataset, index_at& index, bool construct = t
index_many(index, dataset.vectors_count(), ids.data(), dataset.vector(0), dataset.dimensions());
}
+ std::size_t mem = index.memory_usage();
+ std::printf("Memory usage: %.2f GB\n", mem / (1024.0 * 1024.0 * 1024.0));
+
// Perform search, evaluate speed
std::vector found_neighbors(dataset.queries_count() * dataset.neighborhood_size());
std::vector found_distances(dataset.queries_count() * dataset.neighborhood_size());
@@ -409,40 +418,38 @@ static void single_shot(dataset_at& dataset, index_at& index, bool construct = t
std::printf("Recall@1 %.2f %%\n", recall_at_1 * 100.f / dataset.queries_count());
std::printf("Recall %.2f %%\n", recall_full * 100.f / dataset.queries_count());
- // Perform joins
- std::vector man_to_woman(dataset.vectors_count());
- std::vector woman_to_man(dataset.vectors_count());
- std::size_t join_attempts = 0;
- {
+ if (!bench_join) {
+ // Perform joins using maps to support non-contiguous IDs
+ std::unordered_map man_to_woman;
+ std::unordered_map woman_to_man;
+ std::size_t join_attempts = 0;
+
index_at& men = index;
index_at women = index.copy();
- std::fill(man_to_woman.begin(), man_to_woman.end(), missing_key);
- std::fill(woman_to_man.begin(), woman_to_man.end(), missing_key);
- {
- executor_default_t executor(index.limits().threads());
- running_stats_printer_t printer{1, "Join"};
- join_result_t result = join( //
- men, women, index_join_config_t{executor.size()}, //
- man_to_woman.data(), woman_to_man.data(), //
- executor, [&](std::size_t progress, std::size_t total) {
- if (progress % 1000 == 0)
- printer.print(progress, total);
- return true;
- });
- // Refresh once again to show 100% completion
- printer.print();
- join_attempts = result.visited_members;
- }
- }
- // Evaluate join quality
- std::size_t recall_join = 0, unmatched_count = 0;
- for (std::size_t i = 0; i != index.size(); ++i) {
- recall_join += man_to_woman[i] == static_cast(i);
- unmatched_count += man_to_woman[i] == missing_key;
+
+ executor_default_t executor(index.limits().threads());
+ running_stats_printer_t printer{1, "Join"};
+ join_result_t result = join( //
+ men, women, index_join_config_t{executor.size()}, //
+ man_to_woman, woman_to_man, //
+ executor, [&](std::size_t progress, std::size_t total) {
+ if (progress % 1000 == 0)
+ printer.print(progress, total);
+ return true;
+ });
+ // Refresh once again to show 100% completion
+ printer.print();
+ join_attempts = result.visited_members;
+
+ // Evaluate join quality
+ std::size_t recall_join = 0;
+ for (auto const& [man, woman] : man_to_woman)
+ recall_join += (man == woman);
+ std::size_t unmatched_count = dataset.vectors_count() - man_to_woman.size();
+ std::printf("Recall Joins %.2f %%\n", recall_join * 100.f / index.size());
+ std::printf("Unmatched %.2f %% (%zu items)\n", unmatched_count * 100.f / index.size(), unmatched_count);
+ std::printf("Proposals %.2f / man (%zu total)\n", join_attempts * 1.f / index.size(), join_attempts);
}
- std::printf("Recall Joins %.2f %%\n", recall_join * 100.f / index.size());
- std::printf("Unmatched %.2f %% (%zu items)\n", unmatched_count * 100.f / index.size(), unmatched_count);
- std::printf("Proposals %.2f / man (%zu total)\n", join_attempts * 1.f / index.size(), join_attempts);
std::printf("------------\n");
std::printf("\n");
@@ -473,7 +480,7 @@ void handler(int sig) {
name = "";
}
DWORD bytes_written;
- WriteFile(STDERR_FILENO, name, std::strlen(name), &bytes_written, NULL);
+ WriteFile(STDERR_FILENO, name, static_cast(std::strlen(name)), &bytes_written, NULL);
WriteFile(STDERR_FILENO, "\n", 1, &bytes_written, NULL);
}
free(symbol);
@@ -508,49 +515,24 @@ struct args_t {
bool help = false;
bool big = false;
+ bool join = false;
+ bool view = false;
- bool quantize_bf16 = false;
- bool quantize_f16 = false;
- bool quantize_i8 = false;
- bool quantize_b1 = false;
-
- bool metric_ip = false;
- bool metric_l2 = false;
- bool metric_cos = false;
- bool metric_haversine = false;
- bool metric_divergence = false;
- bool metric_hamming = false;
- bool metric_tanimoto = false;
- bool metric_sorensen = false;
+ std::string dtype_str = "f32";
+ std::string metric_str = "ip";
metric_kind_t metric() const noexcept {
- if (metric_l2)
- return metric_kind_t::l2sq_k;
- if (metric_cos)
- return metric_kind_t::cos_k;
- if (metric_haversine)
- return metric_kind_t::haversine_k;
- if (metric_divergence)
- return metric_kind_t::divergence_k;
- if (metric_hamming)
- return metric_kind_t::hamming_k;
- if (metric_tanimoto)
- return metric_kind_t::tanimoto_k;
- if (metric_sorensen)
- return metric_kind_t::sorensen_k;
- return metric_kind_t::ip_k;
+ auto parsed = metric_from_name(metric_str.c_str(), metric_str.size());
+ if (!parsed)
+ return metric_kind_t::ip_k;
+ return parsed.result;
}
scalar_kind_t quantization() const noexcept {
- if (quantize_bf16)
- return scalar_kind_t::bf16_k;
- if (quantize_f16)
- return scalar_kind_t::f16_k;
- if (quantize_i8)
- return scalar_kind_t::i8_k;
- if (quantize_b1)
- return scalar_kind_t::b1x8_k;
- return scalar_kind_t::f32_k;
+ auto parsed = scalar_kind_from_name(dtype_str.c_str(), dtype_str.size());
+ if (!parsed)
+ return scalar_kind_t::f32_k;
+ return parsed.result;
}
};
@@ -569,14 +551,16 @@ void run_punned(dataset_at& dataset, args_t const& args, index_dense_config_t co
std::printf("-- Hardware acceleration: %s\n", index.metric().isa_name());
std::printf("Will benchmark in-memory\n");
- single_shot(dataset, index, true);
+ single_shot(dataset, index, true, args.join);
index.save(args.path_output.c_str());
+ if (!args.view)
+ return;
std::printf("Will benchmark an on-disk view\n");
index_at index_view = index.fork();
index_view.view(args.path_output.c_str());
- single_shot(dataset, index_view, false);
+ single_shot(dataset, index_view, false, args.join);
}
template //
@@ -586,14 +570,16 @@ void run_typed(dataset_at& dataset, args_t const& args, index_config_t config, i
index.reserve(limits);
std::printf("Will benchmark in-memory\n");
- single_shot(dataset, index, true);
+ single_shot(dataset, index, true, args.join);
index.save(args.path_output.c_str());
+ if (!args.view)
+ return;
std::printf("Will benchmark an on-disk view\n");
index_at index_view = index.fork();
index_view.view(args.path_output.c_str());
- single_shot(dataset, index_view, false);
+ single_shot(dataset, index_view, false, args.join);
}
template void bench_with_args(args_t const& args) {
@@ -636,9 +622,9 @@ int main(int argc, char** argv) {
auto args = args_t{};
auto cli = ( //
(option("--vectors") & value("path", args.path_vectors))
- .doc(".[fhbd]bin, .i8bin, .f32bin file path to construct the index"),
+ .doc(".[fhbd]bin, .i8bin, .u8bin, .f32bin file path to construct the index"),
(option("--queries") & value("path", args.path_queries))
- .doc(".[fhbd]bin, .i8bin, .f32bin file path to query the index"),
+ .doc(".[fhbd]bin, .i8bin, .u8bin, .f32bin file path to query the index"),
(option("--neighbors") & value("path", args.path_neighbors)).doc(".ibin, .i32bin file path with ground truth"),
(option("--ids") & value("path", args.path_ids)).doc(".i32bin file path with vector IDs (optional)"),
(option("-o", "--output") & value("path", args.path_output)).doc(".usearch output file path"),
@@ -649,20 +635,13 @@ int main(int argc, char** argv) {
(option("--expansion-search") & value("integer", args.expansion_search)).doc("Affects search depth"),
(option("--rows-skip") & value("integer", args.vectors_to_skip)).doc("Number of vectors to skip"),
(option("--rows-take") & value("integer", args.vectors_to_take)).doc("Number of vectors to take"),
- ( //
- option("-bf16", "--bf16quant").set(args.quantize_bf16).doc("Enable `bf16_t` quantization") |
- option("-f16", "--f16quant").set(args.quantize_f16).doc("Enable `f16_t` quantization") |
- option("-i8", "--i8quant").set(args.quantize_i8).doc("Enable `i8_t` quantization") |
- option("-b1", "--b1quant").set(args.quantize_b1).doc("Enable `b1x8_t` quantization")),
- ( //
- option("--ip").set(args.metric_ip).doc("Choose Inner Product metric") |
- option("--l2sq").set(args.metric_l2).doc("Choose L2 Euclidean metric") |
- option("--cos").set(args.metric_cos).doc("Choose Angular metric") |
- option("--hamming").set(args.metric_hamming).doc("Choose Hamming metric") |
- option("--tanimoto").set(args.metric_tanimoto).doc("Choose Tanimoto metric") |
- option("--sorensen").set(args.metric_sorensen).doc("Choose Sorensen metric") |
- option("--haversine").set(args.metric_haversine).doc("Choose Haversine metric")),
- option("-h", "--help").set(args.help).doc("Print this help information on this tool and exit"));
+ (option("--dtype") & value("type", args.dtype_str))
+ .doc("Quantization type: f64, f32, bf16, f16, e5m2, e4m3, e3m2, e2m3, i8, u8, b1"),
+ (option("--metric") & value("name", args.metric_str))
+ .doc("Distance metric: ip, l2sq, cos, hamming, tanimoto, sorensen, haversine"),
+ option("-h", "--help").set(args.help).doc("Print this help information on this tool and exit"),
+ option("--join").set(args.join).doc("Also benchmark joins"),
+ option("--view").set(args.view).doc("Also benchmark on-disk view"));
if (!parse(argc, argv, cli)) {
std::cerr << make_man_page(cli, argv[0]);
@@ -678,10 +657,12 @@ int main(int argc, char** argv) {
// to better estimate statistics between tasks batches, without having to recreate
// the threads.
omp_set_dynamic(true);
- omp_set_num_threads(args.threads);
+ omp_set_num_threads(static_cast(args.threads));
std::printf("- OpenMP threads: %d\n", omp_get_max_threads());
#endif
+ std::printf("- Hardware acceleration compiled: %s\n", hardware_acceleration_compiled());
+ std::printf("- Hardware acceleration available: %s\n", hardware_acceleration_available());
std::printf("- Dataset: \n");
std::printf("-- Base vectors path: %s\n", args.path_vectors.c_str());
std::printf("-- Query vectors path: %s\n", args.path_queries.c_str());
@@ -693,18 +674,18 @@ int main(int argc, char** argv) {
return stack.find(needle, stack.size() - needle.size()) != std::string_view::npos;
};
- if (ends_with(args.path_vectors, ".fbin"))
- bench_with_args(args);
- else if (ends_with(args.path_vectors, ".dbin"))
+ if (ends_with(args.path_vectors, ".dbin"))
bench_with_args(args);
+ else if (ends_with(args.path_vectors, ".fbin") || ends_with(args.path_vectors, ".f32bin"))
+ bench_with_args(args);
else if (ends_with(args.path_vectors, ".hbin"))
bench_with_args(args);
- else if (ends_with(args.path_vectors, ".bbin"))
- bench_with_args(args);
else if (ends_with(args.path_vectors, ".i8bin"))
bench_with_args(args);
- else if (ends_with(args.path_vectors, ".f32bin"))
- bench_with_args(args);
+ else if (ends_with(args.path_vectors, ".u8bin"))
+ bench_with_args(args);
+ else if (ends_with(args.path_vectors, ".bbin"))
+ bench_with_args(args);
else
throw std::runtime_error("Unknown input file path");
diff --git a/cpp/test.cpp b/cpp/test.cpp
index a125f05ad..91cff0b05 100644
--- a/cpp/test.cpp
+++ b/cpp/test.cpp
@@ -12,14 +12,45 @@
* - 128-bit `uuid_t` keys and `enum slot64_t : std::uint64_t` make most sense for
* for database users, implementing portable, concurrent systems.
*/
+#include // `assert`
+#include // `std::abs`
+#include // `std::signal`, `SIGSEGV`, ...
+#include // `std::fprintf`
+#include // `std::_Exit`
+
#include // `std::shuffle`
-#include // `assert`
-#include // `std::abs`
#include // `std::default_random_engine`
#include // `std::terminate`
#include // `std::unordered_map`
#include // `std::vector`
+// Back-trace support. Prefer the C++23 `` library when the
+// toolchain + stdlib expose it (`__cpp_lib_stacktrace`); otherwise fall back
+// to the OS-native facility so that unit-test crashes in CI log something
+// useful beyond a bare exit code.
+#if defined(__has_include)
+#if __has_include()
+#include
+#endif
+#endif
+#if defined(__cpp_lib_stacktrace) && __cpp_lib_stacktrace >= 202011L
+#define USEARCH_HAS_STD_STACKTRACE 1
+#else
+#define USEARCH_HAS_STD_STACKTRACE 0
+#if defined(_WIN32)
+// `windows.h` must precede `dbghelp.h` โ the latter uses `PSTR` and friends
+// that are only defined after `windows.h`. The blank line keeps clang-format
+// from re-sorting the two headers into a single alphabetized block.
+#include
+
+#include
+#pragma comment(lib, "Dbghelp.lib")
+#else
+#include
+#include
+#endif
+#endif
+
#define SZ_USE_X86_AVX512 0 // Sanitizers hate AVX512
#include // Levenshtein distance implementation
@@ -54,7 +85,7 @@ void __expect_eq(value_at a, value_at b, char const* file, int line, char const*
enum slot32_t : std::uint32_t {};
template <> struct unum::usearch::hash_gt : public unum::usearch::hash_gt {};
template <> struct unum::usearch::default_free_value_gt {
- static slot32_t value() noexcept { return static_cast(std::numeric_limits::max()); }
+ static slot32_t value() noexcept { return static_cast((std::numeric_limits::max)()); }
};
/*
@@ -166,8 +197,8 @@ void test_uint40() {
}
// Test min and max functions
- expect_eq(uint40_t::min(), uint40_t(0u));
- expect_eq(uint40_t::max(), uint40_t(max_uint40_k));
+ expect_eq((uint40_t::min)(), uint40_t(0u));
+ expect_eq((uint40_t::max)(), uint40_t(max_uint40_k));
// Test copy and move semantics
for (std::uint64_t input_u64 : test_numbers) {
@@ -664,7 +695,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) {
vector_of_vectors_t vector_of_vectors(collection_size);
for (auto& vector : vector_of_vectors) {
vector.resize(dimensions);
- std::generate(vector.begin(), vector.end(), [=] { return float(std::rand()) / float(INT_MAX); });
+ std::generate(vector.begin(), vector.end(), [=] { return float(std::rand()) / float(RAND_MAX); });
}
struct metric_t {
@@ -1127,20 +1158,20 @@ void test_filtered_search() {
{
auto predicate = [](index_dense_t::key_t key) { return key != 0; };
auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate);
- expect_eq(10, results.size()); // ! Should not contain 0
+ expect_eq(10u, results.size()); // ! Should not contain 0
for (std::size_t i = 0; i != results.size(); ++i)
expect(0 != results[i].member.key);
}
{
auto predicate = [](index_dense_t::key_t) { return false; };
auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate);
- expect_eq(0, results.size()); // ! Should not contain 0
+ expect_eq(0u, results.size()); // ! Should not contain 0
}
{
auto predicate = [](index_dense_t::key_t key) { return key == 10; };
auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate);
- expect_eq(1, results.size()); // ! Should not contain 0
- expect_eq(10, results[0].member.key);
+ expect_eq(1u, results.size()); // ! Should not contain 0
+ expect_eq(index_dense_t::key_t(10), results[0].member.key);
}
}
@@ -1179,13 +1210,73 @@ void test_isolate() {
}
}
+static void usearch_write_backtrace(int signal_number) {
+ std::fprintf(stderr, "\n[usearch] Fatal signal %d. Back-trace:\n", signal_number);
+#if USEARCH_HAS_STD_STACKTRACE
+ // C++23 `std::stacktrace` covers every platform the library can reach.
+ auto const current_trace = std::stacktrace::current();
+ std::size_t frame_index = 0;
+ for (auto const& frame : current_trace) {
+ std::fprintf(stderr, " #%2zu %s\n", frame_index, std::to_string(frame).c_str());
+ ++frame_index;
+ }
+#elif defined(_WIN32)
+ // Fallback for MSVC stdlibs without ``: DbgHelp API.
+ constexpr USHORT backtrace_depth_limit = 64;
+ void* backtrace_frames[backtrace_depth_limit];
+ USHORT backtrace_depth = CaptureStackBackTrace(0, backtrace_depth_limit, backtrace_frames, nullptr);
+ HANDLE current_process = GetCurrentProcess();
+ SymInitialize(current_process, nullptr, TRUE);
+
+ unsigned char symbol_info_buffer[sizeof(SYMBOL_INFO) + 256 * sizeof(char)];
+ SYMBOL_INFO* symbol_info = reinterpret_cast(symbol_info_buffer);
+ symbol_info->MaxNameLen = 255;
+ symbol_info->SizeOfStruct = sizeof(SYMBOL_INFO);
+
+ for (USHORT frame_index = 0; frame_index < backtrace_depth; ++frame_index) {
+ if (SymFromAddr(current_process, reinterpret_cast(backtrace_frames[frame_index]), 0, symbol_info))
+ std::fprintf(stderr, " #%2u %s + 0x%llx\n", static_cast(frame_index), symbol_info->Name,
+ static_cast(reinterpret_cast(backtrace_frames[frame_index]) -
+ symbol_info->Address));
+ else
+ std::fprintf(stderr, " #%2u %p\n", static_cast(frame_index), backtrace_frames[frame_index]);
+ }
+#else
+ // Fallback for POSIX stdlibs without ``: ``.
+ constexpr int backtrace_depth_limit = 64;
+ void* backtrace_frames[backtrace_depth_limit];
+ int const backtrace_depth = backtrace(backtrace_frames, backtrace_depth_limit);
+ backtrace_symbols_fd(backtrace_frames, backtrace_depth, STDERR_FILENO);
+#endif
+ std::fflush(stderr);
+}
+
+static void usearch_crash_handler(int signal_number) {
+ usearch_write_backtrace(signal_number);
+ // Restore the default disposition and re-raise so the shell / CI sees the true exit status.
+ std::signal(signal_number, SIG_DFL);
+ std::raise(signal_number);
+}
+
+static void install_crash_handlers() {
+ int const fatal_signals[] = {SIGSEGV, SIGABRT, SIGILL, SIGFPE};
+ for (int signal_number : fatal_signals)
+ std::signal(signal_number, &usearch_crash_handler);
+}
+
int main(int, char**) {
- test_uint40();
- test_cosine(10, 10);
+ install_crash_handlers();
+
+ std::printf("Hardware acceleration compiled: %s\n", hardware_acceleration_compiled());
+ std::printf("Hardware acceleration available: %s\n", hardware_acceleration_available());
// Non-default floating-point types may result in many compilation & rounding issues.
- test_cosine(10, 10);
+ test_uint40();
+ test_cosine(10, 10);
test_cosine(10, 10);
+ test_cosine(10, 10);
+ test_cosine(10, 10);
+ test_cosine(10, 10);
// Test plugins, like K-Means clustering.
{
@@ -1195,7 +1286,7 @@ int main(int, char**) {
std::vector vectors(vectors_count * dimensions), centroids(centroids_count * dimensions);
matrix_slice_gt vectors_slice(vectors.data(), dimensions, vectors_count);
matrix_slice_gt centroids_slice(centroids.data(), dimensions, centroids_count);
- std::generate(vectors.begin(), vectors.end(), [] { return float(std::rand()) / float(INT_MAX); });
+ std::generate(vectors.begin(), vectors.end(), [] { return float(std::rand()) / float(RAND_MAX); });
std::vector assignments(vectors_count);
std::vector distances(vectors_count);
auto clustering_result = clustering(vectors_slice, centroids_slice, {assignments.data(), assignments.size()},
@@ -1209,9 +1300,11 @@ int main(int, char**) {
for (std::size_t dataset_count : {10, 100})
for (std::size_t queries_count : {1, 10})
for (std::size_t wanted_count : {1, 5}) {
- test_exact_search(dataset_count, queries_count, wanted_count);
- test_exact_search(dataset_count, queries_count, wanted_count);
+ test_exact_search(dataset_count, queries_count, wanted_count);
test_exact_search(dataset_count, queries_count, wanted_count);
+ test_exact_search(dataset_count, queries_count, wanted_count);
+ test_exact_search(dataset_count, queries_count, wanted_count);
+ test_exact_search(dataset_count, queries_count, wanted_count);
}
// Make sure the initializers and the algorithms can work with inadequately small values.
@@ -1235,12 +1328,16 @@ int main(int, char**) {
std::printf("Testing common cases\n");
for (std::size_t collection_size : {10, 500})
for (std::size_t dimensions : {97, 256}) {
- std::printf("- Indexing %zu vectors with cos: \n", collection_size);
- test_cosine(collection_size, dimensions);
- std::printf("- Indexing %zu vectors with cos: \n", collection_size);
- test_cosine(collection_size, dimensions);
+ std::printf("- Indexing %zu vectors with cos: \n", collection_size);
+ test_cosine(collection_size, dimensions);
+ std::printf("- Indexing %zu vectors with cos: \n", collection_size);
+ test_cosine(collection_size, dimensions);
std::printf("- Indexing %zu vectors with cos: \n", collection_size);
test_cosine(collection_size, dimensions);
+ std::printf("- Indexing %zu vectors with cos: \n", collection_size);
+ test_cosine(collection_size, dimensions);
+ std::printf("- Indexing %zu vectors with cos: \n", collection_size);
+ test_cosine(collection_size, dimensions);
}
// Test with binary vectors
diff --git a/csharp/nuget/nuget-package.props b/csharp/nuget/nuget-package.props
index 296eaa047..5776741ed 100644
--- a/csharp/nuget/nuget-package.props
+++ b/csharp/nuget/nuget-package.props
@@ -9,8 +9,8 @@
Cloud.Unum.USearch
LICENSE
- https://github.com/unum-cloud/usearch
- https://github.com/unum-cloud/usearch
+ https://github.com/unum-cloud/USearch
+ https://github.com/unum-cloud/USearch
true
unum.png
diff --git a/csharp/src/Cloud.Unum.USearch/NativeMethods.cs b/csharp/src/Cloud.Unum.USearch/NativeMethods.cs
index 1ae42b206..83bca522b 100644
--- a/csharp/src/Cloud.Unum.USearch/NativeMethods.cs
+++ b/csharp/src/Cloud.Unum.USearch/NativeMethods.cs
@@ -15,6 +15,15 @@ internal static class NativeMethods
{
private const string LibraryName = "libusearch_c";
+ [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void_ptr_t usearch_version();
+
+ [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void_ptr_t usearch_hardware_acceleration_compiled();
+
+ [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void_ptr_t usearch_hardware_acceleration_available();
+
[DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern usearch_index_t usearch_init(ref IndexOptions options, out usearch_error_t error);
@@ -89,4 +98,7 @@ out usearch_error_t error
[DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)]
public static extern size_t usearch_rename(usearch_index_t index, usearch_key_t key_from, usearch_key_t key_to, out usearch_error_t error);
+
+ [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void_ptr_t usearch_hardware_acceleration(usearch_index_t index, out usearch_error_t error);
}
diff --git a/csharp/src/Cloud.Unum.USearch/USearchIndex.cs b/csharp/src/Cloud.Unum.USearch/USearchIndex.cs
index 22d3ab671..007a596af 100644
--- a/csharp/src/Cloud.Unum.USearch/USearchIndex.cs
+++ b/csharp/src/Cloud.Unum.USearch/USearchIndex.cs
@@ -719,8 +719,51 @@ protected virtual void Dispose(bool disposing)
}
+ ///
+ /// Returns the SIMD capability used by this index.
+ ///
+ public string HardwareAcceleration()
+ {
+ var ptr = NativeMethods.usearch_hardware_acceleration(_index, out var error);
+ HandleError(error);
+ return Marshal.PtrToStringAnsi(ptr) ?? "serial";
+ }
+
///
/// Destructor for the USearchIndex class.
///
~USearchIndex() => this.Dispose(false);
}
+
+///
+/// System-wide hardware capability queries โ not tied to any index instance.
+///
+public static class USearchCapabilities
+{
+ ///
+ /// Returns the USearch library version string.
+ ///
+ public static string Version()
+ {
+ var ptr = NativeMethods.usearch_version();
+ return Marshal.PtrToStringAnsi(ptr) ?? "unknown";
+ }
+
+ ///
+ /// Returns a comma-separated list of ISAs compiled into this binary.
+ ///
+ public static string HardwareAccelerationCompiled()
+ {
+ var ptr = NativeMethods.usearch_hardware_acceleration_compiled();
+ return Marshal.PtrToStringAnsi(ptr) ?? "serial";
+ }
+
+ ///
+ /// Returns a comma-separated list of ISAs available at runtime.
+ ///
+ public static string HardwareAccelerationAvailable()
+ {
+ var ptr = NativeMethods.usearch_hardware_acceleration_available();
+ return Marshal.PtrToStringAnsi(ptr) ?? "serial";
+ }
+}
diff --git a/csharp/src/Cloud.Unum.USearch/USearchTypes.cs b/csharp/src/Cloud.Unum.USearch/USearchTypes.cs
index f66c4a70c..76b0d5f0d 100644
--- a/csharp/src/Cloud.Unum.USearch/USearchTypes.cs
+++ b/csharp/src/Cloud.Unum.USearch/USearchTypes.cs
@@ -74,26 +74,56 @@ public enum ScalarKind : uint
///
Unknown = 0,
+ ///
+ /// 64-bit floating point.
+ ///
+ Float64 = 2,
+
///
/// 32-bit floating point.
///
Float32 = 1,
///
- /// 64-bit floating point.
+ /// Brain floating point (16-bit).
///
- Float64 = 2,
+ BFloat16 = 6,
///
/// 16-bit floating point.
///
Float16 = 3,
+ ///
+ /// FP8 E5M2 (1 sign + 5 exponent + 2 mantissa).
+ ///
+ E5M2 = 7,
+
+ ///
+ /// FP8 E4M3 (1 sign + 4 exponent + 3 mantissa).
+ ///
+ E4M3 = 8,
+
+ ///
+ /// FP6 E3M2 (1 sign + 3 exponent + 2 mantissa).
+ ///
+ E3M2 = 11,
+
+ ///
+ /// FP6 E2M3 (1 sign + 2 exponent + 3 mantissa).
+ ///
+ E2M3 = 10,
+
///
/// 8-bit integer.
///
Int8 = 4,
+ ///
+ /// 8-bit unsigned integer.
+ ///
+ U8 = 9,
+
///
/// 1-bit binary.
///
diff --git a/docs/_static/custom.js b/docs/_static/custom.js
index 9c97a22d7..926b8243c 100644
--- a/docs/_static/custom.js
+++ b/docs/_static/custom.js
@@ -1,5 +1,5 @@
$(document).ready(function () {
- const github_logo = ``
diff --git a/fp16 b/fp16
deleted file mode 160000
index 0a92994d7..000000000
--- a/fp16
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91
diff --git a/golang/README.md b/golang/README.md
index 764641c98..6f7859b93 100644
--- a/golang/README.md
+++ b/golang/README.md
@@ -8,7 +8,7 @@ Download and install the Debian package from the latest release.
Substitute ``, ``, and `` with your settings.
```sh
-wget https://github.com/unum-cloud/usearch/releases/download//usearch_linux__.deb
+wget https://github.com/unum-cloud/USearch/releases/download//usearch_linux__.deb
dpkg -i usearch_linux__.deb
```
@@ -27,7 +27,7 @@ Download and unpack the zip archive from the latest release.
Move the USearch library and the include file to their respective folders.
```sh
-wget https://github.com/unum-cloud/usearch/releases/download//usearch_macos__.zip
+wget https://github.com/unum-cloud/USearch/releases/download//usearch_macos__.zip
unzip usearch_macos__.zip
sudo mv libusearch_c.dylib /usr/local/lib && sudo mv usearch.h /usr/local/include
```
diff --git a/golang/go.mod b/golang/go.mod
index a8cb11744..e936b5d5c 100644
--- a/golang/go.mod
+++ b/golang/go.mod
@@ -1,3 +1,3 @@
module github.com/unum-cloud/usearch/golang
-go 1.19
+go 1.22
diff --git a/golang/lib.go b/golang/lib.go
index db4879dda..41d4e561a 100644
--- a/golang/lib.go
+++ b/golang/lib.go
@@ -119,18 +119,29 @@ func (m Metric) CValue() C.usearch_metric_kind_t {
// Different quantization types offer different trade-offs between memory usage and precision.
type Quantization uint8
-// Different quantization kinds supported by the USearch library.
+// Different quantization kinds supported by the USearch library,
+// ordered by descending dynamic range.
const (
+ // F64 uses 64-bit double precision floating point
+ F64 Quantization = iota
// F32 uses 32-bit floating point (standard precision)
- F32 Quantization = iota
+ F32
// BF16 uses brain floating-point format (16-bit)
BF16
// F16 uses half-precision floating point (16-bit)
F16
- // F64 uses 64-bit double precision floating point
- F64
+ // E5M2 uses 8-bit floating point (1 sign + 5 exponent + 2 mantissa)
+ E5M2
+ // E4M3 uses 8-bit floating point (1 sign + 4 exponent + 3 mantissa)
+ E4M3
+ // E3M2 uses 6-bit floating point (1 sign + 3 exponent + 2 mantissa)
+ E3M2
+ // E2M3 uses 6-bit floating point (1 sign + 2 exponent + 3 mantissa)
+ E2M3
// I8 uses 8-bit signed integers (quantized)
I8
+ // U8 uses 8-bit unsigned integers
+ U8
// B1 uses binary representation (1-bit per dimension)
B1
)
@@ -138,16 +149,26 @@ const (
// String returns the string representation of the Quantization.
func (q Quantization) String() string {
switch q {
+ case F64:
+ return "F64"
+ case F32:
+ return "F32"
case BF16:
return "BF16"
case F16:
return "F16"
- case F32:
- return "F32"
- case F64:
- return "F64"
+ case E5M2:
+ return "E5M2"
+ case E4M3:
+ return "E4M3"
+ case E3M2:
+ return "E3M2"
+ case E2M3:
+ return "E2M3"
case I8:
return "I8"
+ case U8:
+ return "U8"
case B1:
return "B1"
default:
@@ -157,18 +178,28 @@ func (q Quantization) String() string {
func (q Quantization) CValue() C.usearch_scalar_kind_t {
switch q {
- case F16:
- return C.usearch_scalar_f16_k
- case F32:
- return C.usearch_scalar_f32_k
case F64:
return C.usearch_scalar_f64_k
+ case F32:
+ return C.usearch_scalar_f32_k
+ case BF16:
+ return C.usearch_scalar_bf16_k
+ case F16:
+ return C.usearch_scalar_f16_k
+ case E5M2:
+ return C.usearch_scalar_e5m2_k
+ case E4M3:
+ return C.usearch_scalar_e4m3_k
+ case E3M2:
+ return C.usearch_scalar_e3m2_k
+ case E2M3:
+ return C.usearch_scalar_e2m3_k
case I8:
return C.usearch_scalar_i8_k
+ case U8:
+ return C.usearch_scalar_u8_k
case B1:
return C.usearch_scalar_b1_k
- case BF16:
- return C.usearch_scalar_bf16_k
default:
return C.usearch_scalar_unknown_k
}
@@ -224,6 +255,21 @@ type Index struct {
config IndexConfig
}
+// Version returns the USearch library version string.
+func Version() string {
+ return C.GoString(C.usearch_version())
+}
+
+// HardwareAccelerationCompiled returns a comma-separated list of ISAs compiled into the binary.
+func HardwareAccelerationCompiled() string {
+ return C.GoString(C.usearch_hardware_acceleration_compiled())
+}
+
+// HardwareAccelerationAvailable returns a comma-separated list of ISAs available at runtime.
+func HardwareAccelerationAvailable() string {
+ return C.GoString(C.usearch_hardware_acceleration_available())
+}
+
// NewIndex creates a new approximate nearest neighbor index with the specified configuration.
//
// The index must be destroyed with Destroy() when no longer needed.
@@ -271,11 +317,6 @@ func NewIndex(conf IndexConfig) (index *Index, err error) {
return index, nil
}
-// Version returns the USearch library version string.
-func Version() string {
- return C.GoString(C.usearch_version())
-}
-
// GetHandle returns the C index handel.
func (index *Index) GetHandle() C.usearch_index_t {
return index.handle
@@ -1015,6 +1056,123 @@ func (index *Index) FilteredSearchI8(query []int8, limit uint, handler *Filtered
return keys, distances, nil
}
+// AddU8 adds a uint8 vector to the index.
+// The vector must have exactly Dimensions() elements.
+//
+// This is a convenience method for indexes using U8 quantization.
+func (index *Index) AddU8(key Key, vec []uint8) error {
+ if index.handle == nil {
+ panic("index is uninitialized")
+ }
+ if len(vec) == 0 {
+ return errors.New("vector cannot be empty")
+ }
+ if uint(len(vec)) != index.config.Dimensions {
+ return fmt.Errorf("vector dimension mismatch: got %d, expected %d", len(vec), index.config.Dimensions)
+ }
+ var errorMessage *C.char
+ C.usearch_add(index.handle, (C.usearch_key_t)(key), unsafe.Pointer(&vec[0]), C.usearch_scalar_u8_k, (*C.usearch_error_t)(&errorMessage))
+ runtime.KeepAlive(vec)
+ if errorMessage != nil {
+ return errors.New(C.GoString(errorMessage))
+ }
+ return nil
+}
+
+// SearchU8 searches for nearest neighbors using a uint8 query vector.
+// The query must have exactly Dimensions() elements.
+//
+// This is a convenience method for indexes using U8 quantization.
+func (index *Index) SearchU8(query []uint8, limit uint) (keys []Key, distances []float32, err error) {
+ if index.handle == nil {
+ panic("index is uninitialized")
+ }
+ if len(query) == 0 {
+ return nil, nil, errors.New("query vector cannot be empty")
+ }
+ if uint(len(query)) != index.config.Dimensions {
+ return nil, nil, fmt.Errorf("query dimension mismatch: got %d, expected %d", len(query), index.config.Dimensions)
+ }
+ if limit == 0 {
+ return []Key{}, []float32{}, nil
+ }
+ keys = make([]Key, limit)
+ distances = make([]float32, limit)
+ var errorMessage *C.char
+ resultCount := uint(C.usearch_search(index.handle, unsafe.Pointer(&query[0]), C.usearch_scalar_u8_k, (C.size_t)(limit), (*C.usearch_key_t)(&keys[0]), (*C.usearch_distance_t)(&distances[0]), (*C.usearch_error_t)(&errorMessage)))
+ runtime.KeepAlive(query)
+ runtime.KeepAlive(keys)
+ runtime.KeepAlive(distances)
+ if errorMessage != nil {
+ return nil, nil, errors.New(C.GoString(errorMessage))
+ }
+ keys = keys[:resultCount]
+ distances = distances[:resultCount]
+ return keys, distances, nil
+}
+
+// FilteredSearchU8 searches for nearest neighbors using a uint8 query vector with filtering.
+func (index *Index) FilteredSearchU8(query []uint8, limit uint, handler *FilteredSearchHandler) (keys []Key, distances []float32, err error) {
+ if index.handle == nil {
+ panic("index is uninitialized")
+ }
+
+ if len(query) == 0 {
+ return nil, nil, errors.New("query vector cannot be empty")
+ }
+ if uint(len(query)) != index.config.Dimensions {
+ return nil, nil, fmt.Errorf("query dimension mismatch: got %d, expected %d", len(query), index.config.Dimensions)
+ }
+ if handler == nil {
+ return nil, nil, errors.New("filtered search handler cannot be nil")
+ }
+ if limit == 0 {
+ return []Key{}, []float32{}, nil
+ }
+
+ keys = make([]Key, limit)
+ distances = make([]float32, limit)
+ var errorMessage *C.char
+ resultCount := uint(C.usearch_filtered_search(index.handle, unsafe.Pointer(&query[0]), C.usearch_scalar_u8_k, (C.size_t)(limit),
+ (C.usearch_filtered_search_callback_t)(C.goFilteredSearchCallback), unsafe.Pointer(handler),
+ (*C.usearch_key_t)(&keys[0]), (*C.usearch_distance_t)(&distances[0]), (*C.usearch_error_t)(&errorMessage)))
+ runtime.KeepAlive(query)
+ runtime.KeepAlive(keys)
+ runtime.KeepAlive(distances)
+ runtime.KeepAlive(handler)
+ if errorMessage != nil {
+ return nil, nil, errors.New(C.GoString(errorMessage))
+ }
+
+ keys = keys[:resultCount]
+ distances = distances[:resultCount]
+ return keys, distances, nil
+}
+
+// GetU8 retrieves a uint8 vector by key from the index.
+// Returns nil if the key is not found.
+func (index *Index) GetU8(key Key, maxCount uint) (vectors []uint8, err error) {
+ if index.handle == nil {
+ panic("index is uninitialized")
+ }
+
+ if maxCount == 0 {
+ return nil, nil
+ }
+
+ vectors = make([]uint8, index.config.Dimensions*maxCount)
+ var errorMessage *C.char
+ found := uint(C.usearch_get(index.handle, (C.usearch_key_t)(key), (C.size_t)(maxCount), unsafe.Pointer(&vectors[0]), C.usearch_scalar_u8_k, (*C.usearch_error_t)(&errorMessage)))
+ runtime.KeepAlive(vectors)
+ if errorMessage != nil {
+ return nil, errors.New(C.GoString(errorMessage))
+ }
+ if found == 0 {
+ return nil, nil
+ }
+ return vectors, nil
+}
+
// DistanceI8 computes the distance between two int8 vectors.
//
// Example:
diff --git a/golang/lib_test.go b/golang/lib_test.go
index 4f9b72439..12eff69c5 100644
--- a/golang/lib_test.go
+++ b/golang/lib_test.go
@@ -45,6 +45,14 @@ func generateTestVectorI8(dimensions uint) []int8 {
return vector
}
+func generateTestVectorU8(dimensions uint) []uint8 {
+ vector := make([]uint8, dimensions)
+ for i := uint(0); i < dimensions; i++ {
+ vector[i] = uint8((i % 255) + 1)
+ }
+ return vector
+}
+
func populateIndex(t *testing.T, index *Index, vectorCount int) [][]float32 {
vectors := make([][]float32, vectorCount)
err := index.Reserve(uint(vectorCount))
@@ -657,6 +665,65 @@ func TestQuantizationTypes(t *testing.T) {
t.Fatalf("FilteredSearchI8 returned incorrect results")
}
})
+
+ t.Run("U8 operations", func(t *testing.T) {
+ index := createTestIndex(t, 32, U8)
+ defer func() {
+ if err := index.Destroy(); err != nil {
+ t.Errorf("Failed to destroy index: %v", err)
+ }
+ }()
+
+ if err := index.Reserve(1); err != nil {
+ t.Fatalf("Failed to reserve capacity: %v", err)
+ }
+ vector := generateTestVectorU8(32)
+ err := index.AddU8(1, vector)
+ if err != nil {
+ t.Fatalf("U8 Add failed: %v", err)
+ }
+
+ keys, _, err := index.SearchU8(vector, 1)
+ if err != nil {
+ t.Fatalf("U8 Search failed: %v", err)
+ }
+
+ if len(keys) == 0 || keys[0] != 1 {
+ t.Fatalf("U8 search results incorrect")
+ }
+
+ // Test FilteredSearchU8
+ handler := &FilteredSearchHandler{
+ Callback: func(key Key, handler *FilteredSearchHandler) int {
+ if key%2 == 0 {
+ return 1
+ }
+ return 0
+ },
+ Data: int64(1),
+ }
+
+ keys, _, err = index.FilteredSearchU8(vector, 1, handler)
+ if err != nil {
+ t.Fatalf("FilteredSearchU8 failed: %v", err)
+ }
+
+ if len(keys) > 0 {
+ t.Fatalf("FilteredSearchU8 returned incorrect results")
+ }
+
+ // Test GetU8
+ retrieved, err := index.GetU8(1, 1)
+ if err != nil {
+ t.Fatalf("U8 Get failed: %v", err)
+ }
+ if retrieved == nil {
+ t.Fatalf("U8 Get returned nil")
+ }
+ if len(retrieved) != 32 {
+ t.Fatalf("U8 Get returned wrong dimensions: got %d, expected 32", len(retrieved))
+ }
+ })
}
func TestUnsafeOperations(t *testing.T) {
diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp
index 21a212d99..24ddbaac3 100644
--- a/include/usearch/index.hpp
+++ b/include/usearch/index.hpp
@@ -597,6 +597,125 @@ template > class bitset_gt {
using bitset_t = bitset_gt<>;
+/**
+ * @brief Cache-line-padded striped spin-lock array for concurrent graph mutations.
+ * Maps node slots to lock stripes via Fibonacci hashing, with each stripe
+ * occupying its own cache line to eliminate false sharing.
+ * The number of stripes is proportional to `threads * connectivity`, not
+ * graph size, keeping the lock array comfortably within L2/L3 cache.
+ */
+template , std::size_t cache_line_ak = 128> //
+class striped_locks_gt {
+ using allocator_t = allocator_at;
+ using byte_t = typename allocator_t::value_type;
+ static_assert(sizeof(byte_t) == 1, "Allocator must allocate separate addressable bytes");
+
+ static constexpr std::uint64_t fibonacci_k = 0x9E3779B97F4A7C15ull;
+
+ using atomic_flag_t = std::atomic;
+ struct alignas(cache_line_ak) padded_lock_t {
+ atomic_flag_t flag{0};
+ char padding_[cache_line_ak - sizeof(atomic_flag_t)];
+ };
+ static_assert(sizeof(padded_lock_t) == cache_line_ak, "Lock stripe must be exactly one cache line");
+
+ // `padded_lock_t` is `alignas(cache_line_ak)` (128 B by default) which
+ // exceeds what a plain allocator guarantees (typically 16 B on x86-64).
+ // Rather than demanding an over-aligned allocator, we over-allocate and
+ // keep a pointer to the aligned sub-region โ `raw_` is what we hand back
+ // to the allocator, `stripes_` is the aligned view used for reads/writes.
+ byte_t* raw_{};
+ std::size_t raw_bytes_{};
+ padded_lock_t* stripes_{};
+ std::size_t count_{};
+ unsigned shift_{};
+
+ inline std::size_t stripe_for_(std::size_t slot) const noexcept {
+ return static_cast((static_cast(slot) * fibonacci_k) >> shift_);
+ }
+
+ public:
+ striped_locks_gt() noexcept {}
+ ~striped_locks_gt() noexcept { reset(); }
+
+ explicit operator bool() const noexcept { return stripes_; }
+
+ void reset() noexcept {
+ if (stripes_)
+ for (std::size_t i = 0; i < count_; i++)
+ stripes_[i].~padded_lock_t();
+ if (raw_)
+ allocator_t{}.deallocate(raw_, raw_bytes_);
+ raw_ = nullptr;
+ raw_bytes_ = 0;
+ stripes_ = nullptr;
+ count_ = 0;
+ shift_ = 64;
+ }
+
+ striped_locks_gt(std::size_t threads, std::size_t connectivity) noexcept {
+ std::size_t desired = threads * connectivity * 4;
+ if (desired < 256)
+ desired = 256;
+ count_ = ceil2(desired);
+ shift_ = 64;
+ for (std::size_t n = count_; n > 1; n >>= 1)
+ shift_--;
+ // Request one extra stripe's worth of slack so we can always land on a
+ // `cache_line_ak`-aligned address inside the allocation, regardless of
+ // what the underlying allocator returns.
+ constexpr std::size_t alignment_k = alignof(padded_lock_t);
+ raw_bytes_ = count_ * sizeof(padded_lock_t) + alignment_k;
+ raw_ = allocator_t{}.allocate(raw_bytes_);
+ if (!raw_) {
+ raw_bytes_ = 0;
+ count_ = 0;
+ shift_ = 64;
+ return;
+ }
+ auto raw_address = reinterpret_cast(raw_);
+ auto aligned_address = (raw_address + alignment_k - 1) & ~(static_cast(alignment_k) - 1);
+ stripes_ = reinterpret_cast(aligned_address);
+ for (std::size_t i = 0; i < count_; i++)
+ new (&stripes_[i]) padded_lock_t();
+ }
+
+ striped_locks_gt(striped_locks_gt&& other) noexcept {
+ raw_ = exchange(other.raw_, (byte_t*)nullptr);
+ raw_bytes_ = exchange(other.raw_bytes_, std::size_t{0});
+ stripes_ = exchange(other.stripes_, nullptr);
+ count_ = exchange(other.count_, std::size_t{0});
+ shift_ = exchange(other.shift_, unsigned{64});
+ }
+
+ striped_locks_gt& operator=(striped_locks_gt&& other) noexcept {
+ std::swap(raw_, other.raw_);
+ std::swap(raw_bytes_, other.raw_bytes_);
+ std::swap(stripes_, other.stripes_);
+ std::swap(count_, other.count_);
+ std::swap(shift_, other.shift_);
+ return *this;
+ }
+
+ striped_locks_gt(striped_locks_gt const&) = delete;
+ striped_locks_gt& operator=(striped_locks_gt const&) = delete;
+
+ inline bool atomic_set(std::size_t i) noexcept {
+ return stripes_[stripe_for_(i)].flag.exchange(1, std::memory_order_acquire);
+ }
+
+ inline void atomic_reset(std::size_t i) noexcept {
+ stripes_[stripe_for_(i)].flag.store(0, std::memory_order_release);
+ }
+
+ inline void lock(std::size_t i) noexcept {
+ while (atomic_set(i))
+ std::this_thread::yield();
+ }
+
+ inline void unlock(std::size_t i) noexcept { atomic_reset(i); }
+};
+
/**
* @brief Similar to `std::priority_queue`, but allows raw access to underlying
* memory, in case you want to shuffle it or sort. Good for collections
@@ -728,10 +847,12 @@ class max_heap_gt {
*/
usearch_profiled_m bool reserve(std::size_t new_capacity) noexcept {
usearch_profile_name_m(max_heap_reserve);
- if (new_capacity < capacity_)
+ if (new_capacity <= capacity_)
return true;
new_capacity = ceil2(new_capacity);
+ if (new_capacity == 0)
+ return false;
new_capacity = (std::max)(new_capacity, (std::max)(capacity_ * 2u, 16u));
auto allocator = allocator_t{};
auto new_elements = allocator.allocate(new_capacity);
@@ -894,10 +1015,12 @@ class sorted_buffer_gt {
inline void clear() noexcept { size_ = 0; }
bool reserve(std::size_t new_capacity) noexcept {
- if (new_capacity < capacity_)
+ if (new_capacity <= capacity_)
return true;
new_capacity = ceil2(new_capacity);
+ if (new_capacity == 0)
+ return false;
new_capacity = (std::max)(new_capacity, (std::max)(capacity_ * 2u, 16u));
auto allocator = allocator_t{};
auto new_elements = allocator.allocate(new_capacity);
@@ -1013,8 +1136,10 @@ class usearch_pack_m uint40_t {
return result;
}
- inline static uint40_t max() noexcept { return uint40_t{}.broadcast(0xFF); }
- inline static uint40_t min() noexcept { return uint40_t{}.broadcast(0); }
+ /* Parenthesized declarator keeps MSVC's preprocessor from expanding
+ * `max` / `min` against ``'s `max(a,b)` / `min(a,b)` macros. */
+ inline static uint40_t(max)() noexcept { return uint40_t{}.broadcast(0xFF); }
+ inline static uint40_t(min)() noexcept { return uint40_t{}.broadcast(0); }
inline bool operator==(uint40_t const& other) const noexcept { return std::memcmp(octets, other.octets, 5) == 0; }
inline bool operator!=(uint40_t const& other) const noexcept { return !(*this == other); }
@@ -1046,7 +1171,7 @@ template struct default_free_value_gt {
template ::value>::type* = nullptr>
static sfinae_element_at value() noexcept {
- return std::numeric_limits::max();
+ return (std::numeric_limits::max)();
}
template ::value>::type* = nullptr>
@@ -1056,7 +1181,7 @@ template struct default_free_value_gt {
};
template <> struct default_free_value_gt {
- static uint40_t value() noexcept { return uint40_t::max(); }
+ static uint40_t value() noexcept { return (uint40_t::max)(); }
};
template element_at default_free_value() { return default_free_value_gt::value(); }
@@ -2031,8 +2156,8 @@ class index_gt {
friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); }
// clang-format off
- member_iterator_gt operator++(int) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) + 1)); }
- member_iterator_gt operator--(int) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) - 1)); }
+ member_iterator_gt operator++(int) noexcept { member_iterator_gt old(index_, slot_); ++(*this); return old; }
+ member_iterator_gt operator--(int) noexcept { member_iterator_gt old(index_, slot_); --(*this); return old; }
member_iterator_gt operator+(difference_type d) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) + d)); }
member_iterator_gt operator-(difference_type d) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) - d)); }
member_iterator_gt& operator++() noexcept { slot_ = static_cast(static_cast(slot_) + 1); return *this; }
@@ -2086,7 +2211,7 @@ class index_gt {
*/
static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); }
- using nodes_mutexes_t = bitset_gt;
+ using nodes_mutexes_t = striped_locks_gt;
using visits_hash_set_t = growing_hash_set_gt, dynamic_allocator_t>;
@@ -2191,7 +2316,7 @@ class index_gt {
misaligned_store(tape_ + shift(i - removed_count), slot);
}
}
- misaligned_store(tape_, old_count - removed_count);
+ misaligned_store(tape_, static_cast(old_count - removed_count));
return removed_count;
}
};
@@ -2509,7 +2634,8 @@ class index_gt {
return true;
}
- nodes_mutexes_t new_mutexes(limits.members);
+ std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity);
+ nodes_mutexes_t new_mutexes(limits.threads(), connectivity_max);
buffer_gt new_nodes(limits.members);
buffer_gt new_contexts(limits.threads());
if (!new_nodes || !new_contexts || !new_mutexes)
@@ -2518,10 +2644,6 @@ class index_gt {
// Move the nodes info, and deallocate previous buffers.
if (nodes_)
std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size());
-
- // Pre-reserve the capacity for `top_for_refine`, which always contains at most one more
- // element than the connectivity factors.
- std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity);
for (std::size_t i = 0; i != new_contexts.size(); ++i)
if (!new_contexts[i].top_for_refine.reserve(connectivity_max + 1))
return false;
@@ -2569,7 +2691,7 @@ class index_gt {
member_cref_t member;
distance_t distance;
- inline match_t() noexcept : member({nullptr, 0}), distance(std::numeric_limits::max()) {}
+ inline match_t() noexcept : member({nullptr, 0}), distance((std::numeric_limits::max)()) {}
inline match_t(member_cref_t member, distance_t distance) noexcept : member(member), distance(distance) {}
@@ -2721,7 +2843,7 @@ class index_gt {
keys[i] = vector_key_t{};
distances[i] = std::numeric_limits::has_signaling_NaN
? std::numeric_limits::signaling_NaN()
- : std::numeric_limits::max();
+ : (std::numeric_limits::max)();
}
return initialized_count;
}
@@ -2788,6 +2910,10 @@ class index_gt {
callback_at&& callback = callback_at{}, //
prefetch_at&& prefetch = prefetch_at{}) usearch_noexcept_m {
+ // Zero expansion is meaningless, fall back to default
+ if (!config.expansion)
+ config.expansion = default_expansion_add();
+
add_result_t result;
if (is_immutable())
return result.failed("Can't add to an immutable index");
@@ -3103,6 +3229,9 @@ class index_gt {
predicate_at&& predicate = predicate_at{}, //
prefetch_at&& prefetch = prefetch_at{}) const noexcept {
+ if (!config.expansion)
+ config.expansion = default_expansion_search();
+
context_t& context = contexts_[config.thread];
cluster_result_t result;
if (!nodes_count_)
@@ -3181,7 +3310,7 @@ class index_gt {
continue;
++result.nodes;
- result.edges += neighbors_(node, level).size();
+ result.edges += neighbors_(node, static_cast(level)).size();
result.allocated_bytes += node_head_bytes_() + neighbors_bytes;
}
@@ -3803,12 +3932,11 @@ class index_gt {
struct node_lock_t {
nodes_mutexes_t& mutexes;
std::size_t slot;
- inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); }
+ inline ~node_lock_t() noexcept { mutexes.unlock(slot); }
};
inline node_lock_t node_lock_(std::size_t slot) const noexcept {
- while (nodes_mutexes_.atomic_set(slot))
- ;
+ nodes_mutexes_.lock(slot);
return {nodes_mutexes_, slot};
}
@@ -3816,18 +3944,17 @@ class index_gt {
nodes_mutexes_t& mutexes;
std::size_t slot;
inline ~optional_node_lock_t() noexcept {
- if (slot != std::numeric_limits::max())
- mutexes.atomic_reset(slot);
+ if (slot != (std::numeric_limits::max)())
+ mutexes.unlock(slot);
}
};
inline optional_node_lock_t optional_node_lock_(std::size_t slot, bool condition) const noexcept {
if (condition) {
- while (nodes_mutexes_.atomic_set(slot))
- ;
+ nodes_mutexes_.lock(slot);
return {nodes_mutexes_, slot};
} else {
- return {nodes_mutexes_, std::numeric_limits::max()};
+ return {nodes_mutexes_, (std::numeric_limits::max)()};
}
}
@@ -3835,8 +3962,8 @@ class index_gt {
nodes_mutexes_t& mutexes;
std::size_t slot;
inline ~node_conditional_lock_t() noexcept {
- if (slot != std::numeric_limits::max())
- mutexes.atomic_reset(slot);
+ if (slot != (std::numeric_limits::max)())
+ mutexes.unlock(slot);
}
};
@@ -3844,10 +3971,10 @@ class index_gt {
bool& failed_to_acquire) const noexcept {
if (!condition) {
failed_to_acquire = false;
- return {nodes_mutexes_, std::numeric_limits::max()};
+ return {nodes_mutexes_, (std::numeric_limits::max)()};
}
failed_to_acquire = nodes_mutexes_.atomic_set(slot);
- return {nodes_mutexes_, failed_to_acquire ? std::numeric_limits::max() : slot};
+ return {nodes_mutexes_, failed_to_acquire ? (std::numeric_limits::max)() : slot};
}
template
@@ -3898,8 +4025,12 @@ class index_gt {
usearch_assert_m(close_slot != new_slot, "Self-loops are impossible");
usearch_assert_m(level <= close_node.level(), "Linking to missing level");
- // If `new_slot` is already present in the neighboring connections of `close_slot`
- // then no need to modify any connections or run the heuristics.
+ // Skip to prevent duplicate entries in the neighbor list.
+ if (std::find_if(close_header.begin(), close_header.end(),
+ [new_slot](compressed_slot_t slot) { return slot == new_slot; }) != close_header.end()) {
+ continue;
+ }
+
if (close_header.size() < connectivity_max) {
close_header.push_back(new_slot);
continue;
@@ -3914,7 +4045,7 @@ class index_gt {
// Export the results:
close_header.clear();
candidates_view_t top_view = refine_(metric, connectivity_max, top_for_refine, context,
- context.computed_distances_in_reverse_refines);
+ context.computed_distances_in_reverse_refines, new_slot, value);
usearch_assert_m(top_view.size(), "This would lead to isolated nodes");
for (std::size_t idx = 0; idx != top_view.size(); idx++)
close_header.push_back(top_view[idx].slot);
@@ -3962,7 +4093,9 @@ class index_gt {
std::size_t progress) noexcept
: index_(index), neighbors_(neighbors), visits_(visits), current_(progress) {}
candidates_iterator_t operator++(int) noexcept {
- return candidates_iterator_t(index_, neighbors_, visits_, current_ + 1).skip_missing();
+ candidates_iterator_t old(index_, neighbors_, visits_, current_);
+ ++(*this);
+ return old;
}
candidates_iterator_t& operator++() noexcept {
++current_;
@@ -4053,6 +4186,10 @@ class index_gt {
// At the very least we are going to explore the starting node and its neighbors
if (!visits.reserve(config_.connectivity_base + 1u))
return false;
+ if (!top.reserve(top_limit))
+ return false;
+ if (!next.reserve(top_limit))
+ return false;
// Optional prefetching
if (!is_dummy())
@@ -4130,6 +4267,10 @@ class index_gt {
// At the very least we are going to explore the starting node and its neighbors
if (!visits.reserve(config_.connectivity_base + 1u))
return false;
+ if (!top.reserve(top_limit))
+ return false;
+ if (!next.reserve(top_limit))
+ return false;
// Optional prefetching
if (!is_dummy())
@@ -4298,16 +4439,50 @@ class index_gt {
}
}
+ /// @brief Helper for `refine_()`: computes inter-neighbor distance, substituting
+ /// @p override_value when either slot matches @p override_slot.
+ /// The `std::nullptr_t` overload below avoids instantiating the override
+ /// branch when no override is provided, keeping the code C++11 compatible.
+ template
+ distance_t inter_neighbor_distance_( //
+ candidate_t const& candidate, candidate_t const& submitted, //
+ compressed_slot_t override_slot, override_value_at override_value, //
+ metric_at&& metric, context_t& context) const noexcept {
+ if (candidate.slot == override_slot)
+ return context.measure(override_value, citerator_at(submitted.slot), metric);
+ else if (submitted.slot == override_slot)
+ return context.measure(override_value, citerator_at(candidate.slot), metric);
+ else
+ return context.measure(citerator_at(candidate.slot), citerator_at(submitted.slot), metric);
+ }
+
+ template
+ distance_t inter_neighbor_distance_( //
+ candidate_t const& candidate, candidate_t const& submitted, //
+ compressed_slot_t, std::nullptr_t, //
+ metric_at&& metric, context_t& context) const noexcept {
+ return context.measure(citerator_at(candidate.slot), citerator_at(submitted.slot), metric);
+ }
+
/**
* @brief This algorithm from the original paper implements a heuristic,
* that massively reduces the number of connections a point has,
* to keep only the neighbors, that are from each other.
+ *
+ * @param[in] override_slot Optional slot whose stored vector is stale (e.g. during update,
+ * where the callback has not yet committed the new vector).
+ * When set, inter-result distances involving this slot will use
+ * @p override_value instead of reading from `citerator_at()`.
+ * @param[in] override_value The up-to-date vector for @p override_slot. Only used when
+ * @p override_value_at is not `std::nullptr_t`.
*/
- template
+ template
candidates_view_t refine_( //
metric_at&& metric, //
std::size_t needed, top_candidates_t& top, context_t& context, //
- std::size_t& refines_counter) const noexcept {
+ std::size_t& refines_counter, //
+ compressed_slot_t override_slot = ((std::numeric_limits::max))(),
+ override_value_at override_value = {}) const noexcept {
// Avoid expensive computation, if the set is already small
candidate_t* top_data = top.data();
@@ -4326,10 +4501,8 @@ class index_gt {
std::size_t idx = 0;
for (; idx < submitted_count; idx++) {
candidate_t submitted = top_data[idx];
- distance_t inter_result_dist = context.measure( //
- citerator_at(candidate.slot), //
- citerator_at(submitted.slot), //
- metric);
+ distance_t inter_result_dist = inter_neighbor_distance_( //
+ candidate, submitted, override_slot, override_value, metric, context);
if (inter_result_dist < candidate.distance) {
good = false;
break;
@@ -4420,7 +4593,7 @@ static join_result_t join( //
return result.failed("Can't join with itself, consider copying");
if (config.max_proposals == 0)
- config.max_proposals = std::log(men.size()) + executor.size();
+ config.max_proposals = static_cast(std::log(men.size())) + executor.size();
using proposals_count_t = std::uint16_t;
config.max_proposals = (std::min)(men.size(), config.max_proposals);
diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp
index 3094494d7..bf23f8424 100644
--- a/include/usearch/index_dense.hpp
+++ b/include/usearch/index_dense.hpp
@@ -220,7 +220,7 @@ inline scalar_kind_t convert_pre_2_10_scalar_kind(scalar_kind_t scalar_kind) noe
case 4: return scalar_kind_t::f64_k;
case 5: return scalar_kind_t::f32_k;
case 6: return scalar_kind_t::f16_k;
- case 7: return scalar_kind_t::f8_k;
+ case 7: return scalar_kind_t::e5m2_k;
case 8: return scalar_kind_t::u64_k;
case 9: return scalar_kind_t::u32_k;
case 10: return scalar_kind_t::u8_k;
@@ -234,7 +234,7 @@ inline scalar_kind_t convert_pre_2_10_scalar_kind(scalar_kind_t scalar_kind) noe
/**
* @brief Fixes the metadata for pre-v2.10 versions, until we can upgrade to v3.
- * Originates from: https://github.com/unum-cloud/usearch/issues/423
+ * Originates from: https://github.com/unum-cloud/USearch/issues/423
*/
inline void fix_pre_2_10_metadata(index_dense_head_t& head) {
if (head.version_major == 2 && head.version_minor < 10) {
@@ -746,8 +746,36 @@ class index_dense_gt {
vectors_tape_allocator_.total_allocated();
}
- static constexpr std::size_t any_thread() { return std::numeric_limits::max(); }
- static constexpr distance_t infinite_distance() { return std::numeric_limits::max(); }
+ /**
+ * @brief Aggregated memory statistics for the allocator tapes used by the dense index.
+ */
+ struct memory_stats_t {
+ /// Memory stats for the graph structure allocator.
+ std::size_t graph_allocated;
+ std::size_t graph_wasted;
+ std::size_t graph_reserved;
+ /// Memory stats for the vectors data allocator.
+ std::size_t vectors_allocated;
+ std::size_t vectors_wasted;
+ std::size_t vectors_reserved;
+ };
+
+ /**
+ * @brief Returns detailed memory statistics with separate breakdowns for the graph
+ * and vectors allocator tapes.
+ * @return A `memory_stats_t` struct with per-tape allocated, wasted, and reserved bytes.
+ */
+ memory_stats_t memory_stats() const {
+ auto const& graph_alloc = typed_->tape_allocator();
+ return {
+ graph_alloc.total_allocated(), graph_alloc.total_wasted(),
+ graph_alloc.total_reserved(), vectors_tape_allocator_.total_allocated(),
+ vectors_tape_allocator_.total_wasted(), vectors_tape_allocator_.total_reserved(),
+ };
+ }
+
+ static constexpr std::size_t any_thread() { return (std::numeric_limits::max)(); }
+ static constexpr distance_t infinite_distance() { return (std::numeric_limits::max)(); }
struct aggregated_distances_t {
std::size_t count = 0;
@@ -757,47 +785,77 @@ class index_dense_gt {
};
// clang-format off
- add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.b1x8); }
- add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.i8); }
- add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f16); }
- add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.bf16); }
- add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f32); }
add_result_t add(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f64); }
+ add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f32); }
+ add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.bf16); }
+ add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f16); }
+ add_result_t add(vector_key_t key, e5m2_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e5m2); }
+ add_result_t add(vector_key_t key, e4m3_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e4m3); }
+ add_result_t add(vector_key_t key, e3m2_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e3m2); }
+ add_result_t add(vector_key_t key, e2m3_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e2m3); }
+ add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.i8); }
+ add_result_t add(vector_key_t key, u8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.u8); }
+ add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.b1x8); }
- search_result_t search(b1x8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.b1x8); }
- search_result_t search(i8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.i8); }
- search_result_t search(f16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f16); }
- search_result_t search(bf16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.bf16); }
- search_result_t search(f32_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f32); }
search_result_t search(f64_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f64); }
+ search_result_t search(f32_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f32); }
+ search_result_t search(bf16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.bf16); }
+ search_result_t search(f16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f16); }
+ search_result_t search(e5m2_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e5m2); }
+ search_result_t search(e4m3_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e4m3); }
+ search_result_t search(e3m2_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e3m2); }
+ search_result_t search(e2m3_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e2m3); }
+ search_result_t search(i8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.i8); }
+ search_result_t search(u8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.u8); }
+ search_result_t search(b1x8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.b1x8); }
- template search_result_t filtered_search(b1x8_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.b1x8); }
- template search_result_t filtered_search(i8_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.i8); }
- template search_result_t filtered_search(f16_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f16); }
- template search_result_t filtered_search(bf16_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.bf16); }
- template search_result_t filtered_search(f32_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f32); }
template search_result_t filtered_search(f64_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f64); }
+ template search_result_t filtered_search(f32_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward