diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..1e7d18272 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +root = true + +[*.yml] +indent_style = space +indent_size = 2 diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 83cfd67af..e4c7ea161 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -84,7 +84,7 @@ body: id: duplicate attributes: label: Is there an existing issue for this? - description: Please search [our issues](https://github.com/unum-cloud/usearch/issues) to see if this bug already exists. + description: Please search [our issues](https://github.com/unum-cloud/USearch/issues) to see if this bug already exists. options: - label: I have searched the existing issues required: true @@ -92,7 +92,7 @@ body: id: terms attributes: label: Code of Conduct - description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/usearch/blob/main/CODE_OF_CONDUCT.md) + description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/USearch/blob/main/CODE_OF_CONDUCT.md) options: - label: I agree to follow this project's Code of Conduct required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 3634918f1..5660ffc32 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -9,7 +9,7 @@ body: attributes: value: | Thanks for taking the time to fill out this feature request! - We have already scheduled a few features for our [next milestone](https://github.com/unum-cloud/usearch/milestones). + We have already scheduled a few features for our [next milestone](https://github.com/unum-cloud/USearch/milestones). Still, if it is something we have yet to think about, we will happily extend our [roadmap](https://github.com/orgs/unum-cloud/projects/2). It is also worth noting that choosing between speed and functionality, we generally prefer the former. So in some cases, it makes more sense to build something on top of USearch rather than patch it ๐Ÿค— @@ -50,7 +50,7 @@ body: id: duplicate attributes: label: Is there an existing issue for this? - description: Please search [our issues](https://github.com/unum-cloud/usearch/issues) to see if this bug already exists. + description: Please search [our issues](https://github.com/unum-cloud/USearch/issues) to see if this bug already exists. options: - label: I have searched the existing issues required: true @@ -58,7 +58,7 @@ body: id: terms attributes: label: Code of Conduct - description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/usearch/blob/main/CODE_OF_CONDUCT.md) + description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/unum-cloud/USearch/blob/main/CODE_OF_CONDUCT.md) options: - label: I agree to follow this project's Code of Conduct required: true diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 4c92c11d0..4bd86da8c 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -9,11 +9,12 @@ on: env: GH_TOKEN: ${{ secrets.SEMANTIC_RELEASE_TOKEN }} PYTHONUTF8: 1 + PYTHONFAULTHANDLER: 1 PYTHON_VERSION: 3.11 DOTNET_VERSION: 8.0.x NODE_VERSION: 20 JAVA_VERSION: 21 - GO_VERSION: "^1.21.0" + GO_VERSION: "^1.22.0" ANDROID_NDK_VERSION: 26.3.11579264 ANDROID_SDK_VERSION: 21 @@ -38,10 +39,8 @@ jobs: version-file: "VERSION" update-version-in: | package.json:"version": "(\d+\.\d+\.\d+)" - package-lock.json:"usearch",\n\s+"version": "(\d+\.\d+\.\d+)" CITATION.cff:^version: (\d+\.\d+\.\d+) Cargo.toml:^version = "(\d+\.\d+\.\d+)" - Cargo.lock:name = "usearch"\nversion = "(\d+\.\d+\.\d+)" wasmer.toml:^version = "(\d+\.\d+\.\d+)" conanfile.py:version = "(\d+\.\d+\.\d+)" java/README.md:usearchVersion = '(\d+\.\d+\.\d+)' @@ -74,8 +73,8 @@ jobs: - name: Build C/C++ run: | sudo apt update - sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev gcc-12 g++-12 - cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1 -D USEARCH_USE_JEMALLOC=1 + sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev + cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_JEMALLOC=1 cmake --build build_artifacts --config RelWithDebInfo - name: Test C++ run: build_artifacts/test_cpp @@ -91,7 +90,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-repeat numpy numba cppyy - pip install --upgrade git+https://github.com/Maratyszcza/PeachPy + python -m pip install . - name: Test Python run: pytest @@ -102,7 +101,7 @@ jobs: with: node-version: ${{ env.NODE_VERSION }} - name: Install dependencies - run: npm ci --skip-scripts + run: npm install --skip-scripts - name: Build JavaScript run: npm run build-js - name: Test JavaScript @@ -113,7 +112,10 @@ jobs: run: | rustup update stable rustup default stable + rustup component add clippy rustc -vV + - name: Lint Rust + run: cargo clippy --all-targets -- -D warnings - name: Build Rust run: cargo build - name: Test Rust @@ -155,11 +157,11 @@ jobs: working-directory: ${{ github.workspace }}/golang/ test_ubuntu_clang: - name: Ubuntu (Clang 18) + name: Ubuntu (Clang 20) runs-on: ubuntu-24.04 env: - CC: clang - CXX: clang++ + CC: clang-20 + CXX: clang++-20 steps: - name: Checkout @@ -168,12 +170,11 @@ jobs: run: git submodule update --init --recursive # C/C++ - # Clang 16 isn't available from default repos on Ubuntu 22.04, so we have to install it manually - name: Build C/C++ run: | sudo apt update - sudo apt install -y cmake build-essential libjemalloc-dev clang - cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=0 -D USEARCH_USE_SIMSIMD=1 -D USEARCH_USE_JEMALLOC=1 + sudo apt install -y cmake build-essential libjemalloc-dev clang-20 + cmake -B build_artifacts -D CMAKE_BUILD_TYPE=RelWithDebInfo -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_SQLITE=1 -D USEARCH_USE_OPENMP=0 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_JEMALLOC=1 cmake --build build_artifacts --config RelWithDebInfo - name: Test C++ run: build_artifacts/test_cpp @@ -186,7 +187,7 @@ jobs: with: node-version: ${{ env.NODE_VERSION }} - name: Install dependencies - run: npm ci --skip-scripts + run: npm install --skip-scripts - name: Build JavaScript run: npm run build-js - name: Test JavaScript @@ -201,7 +202,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest pytest-repeat numpy numba cppyy - pip install --upgrade git+https://github.com/Maratyszcza/PeachPy + python -m pip install . env: CXX: clang++ @@ -234,7 +235,7 @@ jobs: working-directory: ${{ github.workspace }}/csharp test_macos: - name: MacOS + name: macOS runs-on: macos-15 steps: @@ -261,7 +262,7 @@ jobs: with: node-version: ${{ env.NODE_VERSION }} - name: Install dependencies - run: npm ci --skip-scripts + run: npm install --skip-scripts - name: Build JavaScript run: npm run build-js - name: Test JavaScript @@ -314,7 +315,7 @@ jobs: working-directory: ${{ github.workspace }}/csharp test_windows_x86: - name: Windows (x86) + name: Windows (X86) runs-on: windows-2022 steps: - name: Checkout @@ -352,7 +353,7 @@ jobs: with: node-version: ${{ env.NODE_VERSION }} - name: Install dependencies - run: npm ci --skip-scripts + run: npm install --skip-scripts - name: Build JavaScript run: npm run build-js - name: Test JavaScript @@ -383,7 +384,7 @@ jobs: working-directory: ${{ github.workspace }}/csharp test_windows_arm: - name: Windows (Arm) + name: Windows (ARM) runs-on: windows-2022 steps: - name: Checkout @@ -407,8 +408,8 @@ jobs: Write-Host "ARM64 DLL built successfully" build_docker: - name: Docker - runs-on: ubuntu-22.04 + name: Build Docker + runs-on: ubuntu-24.04 steps: - name: Checkout uses: actions/checkout@v6 @@ -421,44 +422,110 @@ jobs: load: true push: false - build_wheels: - name: Build Python Wheels - runs-on: ${{ matrix.os }} - needs: - [ - test_ubuntu_gcc, - test_ubuntu_clang, - test_macos, - test_windows_x86, - test_windows_arm, - ] + build_wheels_linux: + name: Build Python Wheels (Linux) + runs-on: ubuntu-24.04 + needs: [test_ubuntu_gcc, test_ubuntu_clang] strategy: matrix: - os: [ubuntu-24.04, macos-14, windows-2022] - python-version: ["38", "39", "310", "311", "312", "313", "313t", "314", "314t"] + python-version: ["310", "311", "312", "313", "313t", "314", "314t"] steps: - name: Checkout uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: - python-version: 3.x - - # We only need QEMU for Linux builds + python-version: "3.13" - name: Setup QEMU - if: matrix.os == 'ubuntu-24.04' uses: docker/setup-qemu-action@v3 - name: Install cibuildwheel - run: python -m pip install cibuildwheel + uses: nick-fields/retry@v4 + with: + max_attempts: 3 + retry_wait_seconds: 10 + timeout_minutes: 180 + command: python -m pip install cibuildwheel + - name: Build wheels + uses: nick-fields/retry@v4 + with: + max_attempts: 2 + retry_wait_seconds: 30 + timeout_minutes: 180 + command: cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: cp${{ matrix.python-version }}-* + CIBW_ENABLE: cpython-freethreading + CIBW_PLATFORM: linux + + build_wheels_macos: + name: Build Python Wheels (macOS) + runs-on: macos-14 + needs: [test_macos] + strategy: + matrix: + python-version: ["310", "311", "312", "313", "313t", "314", "314t"] + steps: + - name: Checkout + uses: actions/checkout@v6 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.13" + - name: Install cibuildwheel + uses: nick-fields/retry@v4 + with: + max_attempts: 3 + retry_wait_seconds: 10 + timeout_minutes: 180 + command: python -m pip install cibuildwheel - name: Build wheels - run: cibuildwheel --output-dir wheelhouse + uses: nick-fields/retry@v4 + with: + max_attempts: 2 + retry_wait_seconds: 30 + timeout_minutes: 180 + command: cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: cp${{ matrix.python-version }}-* - CIBW_ENABLE: cpython-freethreading # No-GIL 3.13t builds + CIBW_ENABLE: cpython-freethreading + CIBW_PLATFORM: macos + + build_wheels_windows: + name: Build Python Wheels (Windows) + runs-on: windows-2022 + needs: [test_windows_x86] + strategy: + matrix: + python-version: ["310", "311", "312", "313", "313t", "314", "314t"] + steps: + - name: Checkout + uses: actions/checkout@v6 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.13" + - name: Install cibuildwheel + uses: nick-fields/retry@v4 + with: + max_attempts: 3 + retry_wait_seconds: 10 + timeout_minutes: 180 + command: python -m pip install cibuildwheel + - name: Build wheels + uses: nick-fields/retry@v4 + with: + max_attempts: 2 + retry_wait_seconds: 30 + timeout_minutes: 180 + command: cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: cp${{ matrix.python-version }}-* + CIBW_ENABLE: cpython-freethreading + CIBW_PLATFORM: windows test_ubuntu_cross_compilation: - name: Cross Compilation - runs-on: ubuntu-22.04 + name: Cross-Compile Linux + runs-on: ubuntu-24.04 env: CC: clang CXX: clang++ @@ -512,7 +579,7 @@ jobs: test_android: name: Android - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 strategy: fail-fast: false matrix: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 592d6b263..f7e44088b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,7 +11,7 @@ env: DOTNET_VERSION: 8.0.x NODE_VERSION: 20 JAVA_VERSION: 21 - GO_VERSION: "^1.21.0" + GO_VERSION: "^1.22.0" ANDROID_NDK_VERSION: 26.3.11579264 ANDROID_SDK_VERSION: 21 EMSCRIPTEN_VERSION: 3.1.47 @@ -27,7 +27,7 @@ permissions: jobs: versioning: name: Update Version - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - name: Checkout uses: actions/checkout@v6 @@ -41,10 +41,8 @@ jobs: version-file: "VERSION" update-version-in: | package.json:"version": "(\d+\.\d+\.\d+)" - package-lock.json:"usearch",\n\s+"version": "(\d+\.\d+\.\d+)" CITATION.cff:^version: (\d+\.\d+\.\d+) Cargo.toml:^version = "(\d+\.\d+\.\d+)" - Cargo.lock:name = "usearch"\nversion = "(\d+\.\d+\.\d+)" wasmer.toml:^version = "(\d+\.\d+\.\d+)" conanfile.py:version = "(\d+\.\d+\.\d+)" java/README.md:usearchVersion = '(\d+\.\d+\.\d+)' @@ -64,9 +62,9 @@ jobs: github-token: ${{ secrets.SEMANTIC_RELEASE_TOKEN }} rebase: - name: Rebase Dev. Branch + name: Rebase Dev Branch needs: versioning - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - name: Checkout the latest code uses: actions/checkout@v6 @@ -88,8 +86,8 @@ jobs: force: True create_linux_deb_package: - name: Create Debian Package - runs-on: ubuntu-22.04 + name: Build Linux Package + runs-on: ubuntu-24.04 needs: versioning env: CC: clang @@ -171,7 +169,7 @@ jobs: update_latest_release: true create_windows_dll_library: - name: Create Dll Library for Windows + name: Build Windows Library runs-on: windows-2022 needs: versioning strategy: @@ -218,14 +216,20 @@ jobs: run: | cmake -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_BUILD_SQLITE=0 -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_BENCH_CPP=0 -B ./build_release cmake --build ./build_release --config Release - tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar .\build_release\libusearch_c.dll .\c\usearch.h + mkdir pkg + copy .\build_release\libusearch_c.dll pkg\ + copy .\c\usearch.h pkg\ + tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar -C pkg libusearch_c.dll usearch.h - name: Build library (MSVC ARM64) if: matrix.arch == 'arm64' run: | cmake -G "Visual Studio 17 2022" -A ARM64 -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_BUILD_SQLITE=0 -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_BENCH_CPP=0 -B ./build_release cmake --build ./build_release --config Release - tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar .\build_release\libusearch_c.dll .\c\usearch.h + mkdir pkg + copy .\build_release\libusearch_c.dll pkg\ + copy .\c\usearch.h pkg\ + tar -cvf usearch_windows_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.tar -C pkg libusearch_c.dll usearch.h - name: Upload native library for Java uses: actions/upload-artifact@v5 @@ -243,7 +247,7 @@ jobs: update_latest_release: true create_macos_dylib_library: - name: Create Library for MacOS + name: Build macOS Library runs-on: macos-15 needs: versioning strategy: @@ -279,8 +283,8 @@ jobs: -DUSEARCH_BUILD_BENCH_CPP=0 \ -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \ -B ./build_release - cmake --build ./build_release --config Release - zip -r usearch_macos_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_release/libusearch_c.dylib c/usearch.h + cmake --build ./build_release --config Release + zip -j usearch_macos_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_release/libusearch_c.dylib c/usearch.h mv build_release/libusearch_sqlite.dylib usearch_sqlite_macos_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.dylib - name: Upload native library for Java @@ -307,8 +311,8 @@ jobs: update_latest_release: true create_android_library: - name: Create Android Libraries - runs-on: ubuntu-22.04 + name: Build Android Library + runs-on: ubuntu-24.04 needs: versioning strategy: fail-fast: false @@ -356,7 +360,7 @@ jobs: - name: Package library run: | - zip -r usearch_android_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_artifacts/libusearch_c.so c/usearch.h + zip -j usearch_android_${{ matrix.arch }}_${{ steps.set_version.outputs.version }}.zip build_artifacts/libusearch_c.so c/usearch.h - name: Upload native library for Java uses: actions/upload-artifact@v5 @@ -417,23 +421,22 @@ jobs: -DUSEARCH_BUILD_TEST_CPP=0 \ -DUSEARCH_BUILD_BENCH_CPP=0 \ -DUSEARCH_BUILD_LIB_C=0 \ - -DUSEARCH_USE_FP16LIB=1 \ - -DUSEARCH_USE_SIMSIMD=1 \ - -DSIMSIMD_TARGET_HASWELL=1 \ - -DSIMSIMD_TARGET_SKYLAKE=1 \ - -DSIMSIMD_TARGET_ICE=1 \ - -DSIMSIMD_TARGET_GENOA=1 \ - -DSIMSIMD_TARGET_SAPPHIRE=1 \ - -DSIMSIMD_TARGET_NEON=1 \ - -DSIMSIMD_TARGET_NEON_BF16=1 \ - -DSIMSIMD_TARGET_NEON_F16=1 \ - -DSIMSIMD_TARGET_NEON_I8=1 \ - -DSIMSIMD_TARGET_SVE=1 \ - -DSIMSIMD_TARGET_SVE_BF16=1 \ - -DSIMSIMD_TARGET_SVE_F16=1 \ - -DSIMSIMD_TARGET_SVE_I8=1 \ - -DSIMSIMD_TARGET_SVE2=1 \ - -DSIMSIMD_DYNAMIC_DISPATCH=1 + -DUSEARCH_USE_NUMKONG=1 \ + -DNK_TARGET_HASWELL=1 \ + -DNK_TARGET_SKYLAKE=1 \ + -DNK_TARGET_ICELAKE=1 \ + -DNK_TARGET_GENOA=1 \ + -DNK_TARGET_SAPPHIRE=1 \ + -DNK_TARGET_NEON=1 \ + -DNK_TARGET_NEONBFDOT=1 \ + -DNK_TARGET_NEONHALF=1 \ + -DNK_TARGET_NEONSDOT=1 \ + -DNK_TARGET_SVE=1 \ + -DNK_TARGET_SVEBFDOT=1 \ + -DNK_TARGET_SVEHALF=1 \ + -DNK_TARGET_SVESDOT=1 \ + -DNK_TARGET_SVE2=1 \ + -DNK_DYNAMIC_DISPATCH=1 cmake --build build_artifacts --config Release -j @@ -454,16 +457,16 @@ jobs: include: - arch: x64 cmake_arch: x64 - simsimd_target_x86: 1 - simsimd_target_arm: 0 + nk_target_x86: 1 + nk_target_arm: 0 - arch: x86 cmake_arch: Win32 - simsimd_target_x86: 1 - simsimd_target_arm: 0 + nk_target_x86: 1 + nk_target_arm: 0 - arch: arm64 cmake_arch: ARM64 - simsimd_target_x86: 0 - simsimd_target_arm: 1 + nk_target_x86: 0 + nk_target_arm: 1 steps: - name: Check out refreshed version @@ -487,15 +490,14 @@ jobs: -DUSEARCH_BUILD_TEST_CPP=0 ` -DUSEARCH_BUILD_BENCH_CPP=0 ` -DUSEARCH_BUILD_LIB_C=0 ` - -DUSEARCH_USE_FP16LIB=1 ` - -DUSEARCH_USE_SIMSIMD=1 ` - -DSIMSIMD_TARGET_NEON=${{ matrix.simsimd_target_arm }} ` - -DSIMSIMD_TARGET_HASWELL=${{ matrix.simsimd_target_x86 }} ` - -DSIMSIMD_TARGET_SKYLAKE=${{ matrix.simsimd_target_x86 }} ` - -DSIMSIMD_TARGET_ICE=${{ matrix.simsimd_target_x86 }} ` - -DSIMSIMD_TARGET_GENOA=0 ` - -DSIMSIMD_TARGET_SAPPHIRE=0 ` - -DSIMSIMD_DYNAMIC_DISPATCH=1 + -DUSEARCH_USE_NUMKONG=1 ` + -DNK_TARGET_NEON=${{ matrix.nk_target_arm }} ` + -DNK_TARGET_HASWELL=${{ matrix.nk_target_x86 }} ` + -DNK_TARGET_SKYLAKE=${{ matrix.nk_target_x86 }} ` + -DNK_TARGET_ICELAKE=${{ matrix.nk_target_x86 }} ` + -DNK_TARGET_GENOA=0 ` + -DNK_TARGET_SAPPHIRE=0 ` + -DNK_DYNAMIC_DISPATCH=1 cmake --build build_artifacts --config Release @@ -542,14 +544,13 @@ jobs: -DUSEARCH_BUILD_TEST_CPP=0 \ -DUSEARCH_BUILD_BENCH_CPP=0 \ -DUSEARCH_BUILD_LIB_C=0 \ - -DUSEARCH_USE_FP16LIB=1 \ - -DUSEARCH_USE_SIMSIMD=1 \ - -DSIMSIMD_TARGET_HASWELL=1 \ - -DSIMSIMD_TARGET_NEON=1 \ - -DSIMSIMD_TARGET_NEON_BF16=1 \ - -DSIMSIMD_TARGET_NEON_F16=1 \ - -DSIMSIMD_TARGET_NEON_I8=1 \ - -DSIMSIMD_DYNAMIC_DISPATCH=1 + -DUSEARCH_USE_NUMKONG=1 \ + -DNK_TARGET_HASWELL=1 \ + -DNK_TARGET_NEON=1 \ + -DNK_TARGET_NEONBFDOT=1 \ + -DNK_TARGET_NEONHALF=1 \ + -DNK_TARGET_NEONSDOT=1 \ + -DNK_DYNAMIC_DISPATCH=1 cmake --build build_artifacts --config Release @@ -605,13 +606,12 @@ jobs: -DUSEARCH_BUILD_TEST_CPP=0 \ -DUSEARCH_BUILD_BENCH_CPP=0 \ -DUSEARCH_BUILD_LIB_C=0 \ - -DUSEARCH_USE_FP16LIB=1 \ - -DUSEARCH_USE_SIMSIMD=1 \ - -DSIMSIMD_TARGET_NEON=1 \ - -DSIMSIMD_TARGET_NEON_BF16=1 \ - -DSIMSIMD_TARGET_NEON_F16=1 \ - -DSIMSIMD_TARGET_NEON_I8=1 \ - -DSIMSIMD_DYNAMIC_DISPATCH=1 + -DUSEARCH_USE_NUMKONG=1 \ + -DNK_TARGET_NEON=1 \ + -DNK_TARGET_NEONBFDOT=1 \ + -DNK_TARGET_NEONHALF=1 \ + -DNK_TARGET_NEONSDOT=1 \ + -DNK_DYNAMIC_DISPATCH=1 cmake --build build_artifacts --config Release @@ -623,8 +623,8 @@ jobs: retention-days: 1 create_wasm_library: - name: Create WASM Libraries - runs-on: ubuntu-22.04 + name: Build WASM Library + runs-on: ubuntu-24.04 needs: versioning steps: - name: Check out refreshed version @@ -655,14 +655,15 @@ jobs: -DUSEARCH_BUILD_BENCH_CPP=OFF \ -DUSEARCH_BUILD_SQLITE=OFF \ -DUSEARCH_USE_OPENMP=OFF \ - -DUSEARCH_USE_SIMSIMD=ON \ + -DUSEARCH_USE_NUMKONG=ON \ -DUSEARCH_USE_JEMALLOC=OFF emmake cmake --build build_wasm --config Release cd build_wasm - tar -czf ../usearch_wasm_${{ steps.set_version.outputs.version }}.tar.gz libusearch_c.* ../c/usearch.h - zip -r ../usearch_wasm_${{ steps.set_version.outputs.version }}.zip libusearch_c.* ../c/usearch.h + cp ../c/usearch.h . + tar -czf ../usearch_wasm_${{ steps.set_version.outputs.version }}.tar.gz libusearch_c.* usearch.h + zip -j ../usearch_wasm_${{ steps.set_version.outputs.version }}.zip libusearch_c.* usearch.h - name: Upload archives uses: xresloader/upload-to-github-release@v1 @@ -674,7 +675,7 @@ jobs: publish_checksums: name: Compute Assets Checksums - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 needs: [ versioning, @@ -734,7 +735,8 @@ jobs: strategy: matrix: os: [ubuntu-24.04, macos-14, windows-2022] - python-version: ["38", "39", "310", "311", "312", "313", "313t", "314", "314t"] + python-version: + ["310", "311", "312", "313", "313t", "314", "314t"] steps: - name: Check out refreshed version uses: actions/checkout@v6 @@ -768,7 +770,7 @@ jobs: publish_python: name: Publish Python needs: build_wheels - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 environment: name: pypi url: https://pypi.org/p/usearch @@ -834,7 +836,7 @@ jobs: # NPM installation fails spuriously all the time - name: Install Dependencies run: | - npm ci --ignore-scripts || npm ci --ignore-scripts || npm ci --ignore-scripts + npm install --ignore-scripts - run: npm run prebuild-single if: matrix.os != 'macos-14' - run: npm run prebuild-darwin-x64+arm64 @@ -866,7 +868,13 @@ jobs: publish_javascript: name: Publish JavaScript needs: build_javascript - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 + environment: + name: npm-usearch + url: https://www.npmjs.com/package/usearch + permissions: + contents: read + id-token: write steps: - name: Check out refreshed version @@ -879,7 +887,8 @@ jobs: - name: Set up Node.js uses: actions/setup-node@v6 with: - node-version: ${{ env.NODE_VERSION }} + node-version: "24" + registry-url: "https://registry.npmjs.org" - name: Download prebuilds uses: actions/download-artifact@v6 @@ -894,7 +903,7 @@ jobs: # NPM installation fails spuriously all the time - name: Install Dependencies run: | - npm ci --ignore-scripts || npm ci --ignore-scripts || npm ci --ignore-scripts + npm install --ignore-scripts - name: Build the JS from TS run: npm run build-js @@ -906,10 +915,7 @@ jobs: run: npm publish --dry-run - name: Publish - uses: JS-DevTools/npm-publish@v3 - with: - token: ${{ secrets.NPM_TOKEN }} - access: public + run: npm publish --provenance --access public publish_rust: name: Publish Rust @@ -924,7 +930,7 @@ jobs: run: git submodule update --init --recursive - name: Hide submodule Crates run: | - mv simsimd/Cargo.toml simsimd/Cargo.toml.bak + mv numkong/Cargo.toml numkong/Cargo.toml.bak mv stringzilla/Cargo.toml stringzilla/Cargo.toml.bak - name: Set up Rust uses: dtolnay/rust-toolchain@stable @@ -987,7 +993,7 @@ jobs: cp usearch-jni-linux-amd64/libusearch_jni.so build/libs/usearch/shared/linux-amd64/libusearch_jni.so || true cp usearch-jni-linux-arm64/libusearch_jni.so build/libs/usearch/shared/linux-arm64/libusearch_jni.so || true - # Windows + # Windows cp usearch-jni-windows-x64/libusearch_jni.dll build/libs/usearch/shared/windows-amd64/libusearch_jni.dll || true cp usearch-jni-windows-x86/libusearch_jni.dll build/libs/usearch/shared/windows-x86/libusearch_jni.dll || true cp usearch-jni-windows-arm64/libusearch_jni.dll build/libs/usearch/shared/windows-arm64/libusearch_jni.dll || true @@ -1042,7 +1048,7 @@ jobs: update_latest_release: true publish_swift: - name: Publish ObjC & Swift + name: Publish Objective-C & Swift needs: versioning runs-on: macos-15 steps: @@ -1062,7 +1068,7 @@ jobs: publish_docker: name: Publish Docker Image needs: versioning - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 permissions: contents: read packages: write @@ -1107,7 +1113,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macos-15, windows-2022] + os: [ubuntu-24.04, macos-15, windows-2022] runs-on: ${{ matrix.os }} defaults: run: @@ -1122,11 +1128,11 @@ jobs: run: git submodule update --init --recursive - name: Build C library for Linux - if: matrix.os == 'ubuntu-22.04' + if: matrix.os == 'ubuntu-24.04' run: | sudo apt update - sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev gcc-12 g++-12 - cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 + sudo apt install -y cmake build-essential libjemalloc-dev libomp-dev + cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=1 cmake --build build_artifacts --config Release mkdir -p "${{ github.workspace }}/csharp/lib/runtimes/linux-x64/native" cp "${{ github.workspace }}/build_artifacts/libusearch_c.so" "${{ github.workspace }}/csharp/lib/runtimes/linux-x64/native" @@ -1136,7 +1142,7 @@ jobs: run: | brew update brew reinstall cmake - cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 + cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 cmake --build build_artifacts --config Release mkdir -p "${{ github.workspace }}/csharp/lib/runtimes/osx-arm64/native" cp "${{ github.workspace }}/build_artifacts/libusearch_c.dylib" "${{ github.workspace }}/csharp/lib/runtimes/osx-arm64/native" @@ -1145,7 +1151,7 @@ jobs: if: matrix.os == 'windows-2022' run: | choco install cmake - cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=0 -DUSEARCH_USE_JEMALLOC=0 + cmake -B build_artifacts -DCMAKE_BUILD_TYPE=Release -DUSEARCH_BUILD_TEST_CPP=0 -DUSEARCH_BUILD_TEST_C=0 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_NUMKONG=0 -DUSEARCH_USE_JEMALLOC=0 cmake --build build_artifacts --config Release mkdir -p "${{ github.workspace }}\csharp\lib\runtimes\win-x64\native" cp "${{ github.workspace }}\build_artifacts\libusearch_c.dll" "${{ github.workspace }}\csharp\lib\runtimes\win-x64\native" @@ -1158,7 +1164,7 @@ jobs: publish_csharp: name: Publish C# - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 needs: [build_csharp] env: SOLUTION: ${{ github.workspace }}/csharp @@ -1203,7 +1209,7 @@ jobs: build_docs: name: Build Docs - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 if: ${{ always() }} needs: [ @@ -1224,9 +1230,9 @@ jobs: ref: "main" - name: Install dependencies run: | - sudo apt update && - sudo apt install -y doxygen graphviz dia git && - pip install -r docs/requirements.txt && + sudo apt update && + sudo apt install -y doxygen graphviz dia git && + pip install -r docs/requirements.txt && npm install -g jsdoc typedoc - name: Install USearch from PyPi run: pip install usearch @@ -1234,7 +1240,7 @@ jobs: # NPM installation fails spuriously all the time - name: Build CJS JS run: | - npm ci --ignore-scripts || npm ci --ignore-scripts || npm ci --ignore-scripts + npm install --ignore-scripts npm run build-js - name: Build documentation run: cd docs && doxygen conf.dox && make html @@ -1255,7 +1261,7 @@ jobs: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 if: ${{ always() }} needs: build_docs steps: @@ -1278,7 +1284,7 @@ jobs: deploy_docs_vercel: name: Deploy Vercel - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 if: ${{ always() }} needs: build_docs steps: diff --git a/.gitignore b/.gitignore index 95d1183ad..0251973ae 100644 --- a/.gitignore +++ b/.gitignore @@ -26,41 +26,40 @@ cmake_install.cmake gmon.out perf.data -# Python wrappers +# Python SDK __pycache__ *.egg-info dist/ wheelhouse/ - -# Python Environments venv/ +uv.lock -# JS wrappers +# JS SDK node_modules/ javascript/dist/ prebuilds/ - +package-lock.json # Wasm - wasi-sdk-* -# Rust builds +# Rust SDK Cargo.lock target/ -# Java bindings +# Java SDK .gradle *.class -# ObjC and Swift +# ObjC and Swift SDKs .build .swiftpm +Package.resolved -# Golang builds +# Golang SDK golang/usearch.h -# C# builds +# C# SDK csharp/**/[Bb]in/ csharp/**/[Oo]bj/ csharp/lib @@ -75,11 +74,11 @@ csharp/packages *.o *.obj -# Precompiled Headers +# Precompiled headers *.gch *.pch -# Compiled Dynamic libraries +# Compiled dynamic libraries *.so *.dylib *.dll diff --git a/.gitmodules b/.gitmodules index 362bf981f..ed1f71fca 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,8 @@ -[submodule "simsimd"] - path = simsimd - url = https://github.com/ashvardanian/simsimd -[submodule "fp16"] - path = fp16 - url = https://github.com/maratyszcza/fp16 +[submodule "numkong"] + path = numkong + url = https://github.com/ashvardanian/NumKong + branch = main-dev [submodule "stringzilla"] path = stringzilla url = https://github.com/ashvardanian/stringzilla + branch = v3.10.10 diff --git a/.vscode/settings.json b/.vscode/settings.json index 01bccf2e2..82b9422d9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,8 @@ { + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.formatOnSave": true + }, "[go]": { "editor.defaultFormatter": "golang.go", "editor.formatOnSave": true @@ -90,6 +94,7 @@ "rtype", "SIMD", "simsimd", + "numkong", "SLOC", "Sonatype", "sorensen", @@ -116,6 +121,11 @@ "dotnet.defaultSolution": "csharp/Cloud.Unum.USearch.sln", "editor.insertSpaces": true, "editor.tabSize": 4, + "extensions.experimental.affinity": { + "ms-python.vscode-pylance": 3, + "ms-vscode.cpptools": 2, + "rust-lang.rust-analyzer": 1 + }, "files.associations": { "__availability": "cpp", "__bit_reference": "cpp", @@ -243,15 +253,33 @@ "xtree": "cpp", "xutility": "cpp" }, + "files.watcherExclude": { + "**/build_*/**": true, + "**/datasets/**": true, + "**/node_modules/**": true, + "**/numkong/**": true, + "**/stringzilla/**": true, + "**/target/**": true + }, "java.configuration.updateBuildConfiguration": "automatic", "java.format.enabled": false, - "java.saveActions.organizeImports": false, "prettier.singleQuote": true, "prettier.useTabs": false, "python.analysis.diagnosticSeverityOverrides": { "reportMissingImports": "none" }, - "python.testing.pytestArgs": ["python"], + "python.testing.pytestArgs": [ + "python" + ], "python.testing.pytestEnabled": true, - "python.testing.unittestEnabled": false -} + "python.testing.unittestEnabled": false, + "rust-analyzer.cargo.features": [], + "search.exclude": { + "**/build_*/**": true, + "**/datasets/**": true, + "**/node_modules/**": true, + "**/numkong/**": true, + "**/stringzilla/**": true, + "**/target/**": true + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json index da6a33097..c6c2bf84c 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -3,7 +3,7 @@ "tasks": [ { "label": "Linux Build Debug", - "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug", + "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug", "args": [], "type": "shell", "problemMatcher": [ @@ -12,7 +12,7 @@ }, { "label": "Linux Build Release", - "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=0 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo", + "command": "cmake -DCMAKE_CXX_COMPILER=gcc-12 -DCMAKE_CXX_COMPILER=g++-12 -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=1 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo", "args": [], "type": "shell", "problemMatcher": [ @@ -21,19 +21,19 @@ }, { "label": "MacOS Build Debug", - "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug", + "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug", "args": [], - "type": "shell", + "type": "shell" }, { "label": "MacOS Build Release", - "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo", + "command": "cmake -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo", "args": [], "type": "shell" }, { "label": "Windows Build Debug", - "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug", + "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Debug -B ./build_debug && cmake --build ./build_debug --config Debug", "args": [], "type": "shell", "problemMatcher": [ @@ -51,7 +51,7 @@ }, { "label": "Windows Build Release", - "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo", + "command": "cmake -DUSEARCH_BUILD_TEST_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_BUILD_LIB_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_JEMALLOC=0 -DCMAKE_BUILD_TYPE=Release -B ./build_release && cmake --build ./build_release --config RelWithDebInfo", "args": [], "type": "shell", "problemMatcher": [ @@ -72,7 +72,7 @@ "type": "swift", "args": [ "build", - "--build-tests", + "--build-tests" ], "env": {}, "cwd": "${workspaceFolder}", diff --git a/BENCHMARKS.md b/BENCHMARKS.md index 2efcbe532..205981825 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -64,8 +64,8 @@ To achieve best highest results we suggest compiling locally for the target arch ```sh git submodule update --init --recursive -cmake -USEARCH_BUILD_BENCH_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_OPENMP=1 -DUSEARCH_USE_SIMSIMD=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -B build_profile -cmake --build build_profile --config RelWithDebInfo -j +cmake -DUSEARCH_BUILD_BENCH_CPP=1 -DUSEARCH_BUILD_TEST_C=1 -DUSEARCH_USE_NUMKONG=1 -DUSEARCH_USE_OPENMP=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -B build_profile +cmake --build build_profile --config RelWithDebInfo --parallel build_profile/bench_cpp --help ``` @@ -76,15 +76,14 @@ SYNOPSIS build_profile/bench_cpp [--vectors ] [--queries ] [--neighbors ] [-o ] [-b] [-j ] [-c ] [--expansion-add ] [--expansion-search ] [--rows-skip ] - [--rows-take ] [-bf16|-f16|-i8|-b1] - [--ip|--l2sq|--cos|--hamming|--tanimoto|--sorensen|--haversine] [-h] + [--rows-take ] [--dtype ] [--metric ] [-h] OPTIONS --vectors - .[fhbd]bin file path to construct the index + .[fhbd]bin, .i8bin, .u8bin, .f32bin file path to construct the index --queries - .[fhbd]bin file path to query the index + .[fhbd]bin, .i8bin, .u8bin, .f32bin file path to query the index --neighbors .ibin file path with ground truth @@ -111,25 +110,12 @@ OPTIONS --rows-take Number of vectors to take - -bf16, --bf16quant - Enable `bf16_t` quantization + --dtype + Quantization type: f64, f32, bf16, f16, e5m2, e4m3, e3m2, e2m3, i8, u8, b1 - -f16, --f16quant - Enable `f16_t` quantization + --metric + Distance metric: ip, l2sq, cos, hamming, tanimoto, sorensen, haversine - -i8, --i8quant - Enable `i8_t` quantization - - -b1, --b1quant - Enable `b1x8_t` quantization - - --ip Choose Inner Product metric - --l2sq Choose L2 Euclidean metric - --cos Choose Angular metric - --hamming Choose Hamming metric - --tanimoto Choose Tanimoto metric - --sorensen Choose Sorensen metric - --haversine Choose Haversine metric -h, --help Print this help information on this tool and exit ``` @@ -139,14 +125,15 @@ Here is an example of running the C++ benchmark: build_profile/bench_cpp \ --vectors datasets/wiki_1M/base.1M.fbin \ --queries datasets/wiki_1M/query.public.100K.fbin \ - --neighbors datasets/wiki_1M/groundtruth.public.100K.ibin + --neighbors datasets/wiki_1M/groundtruth.public.100K.ibin \ + --dtype bf16 --metric ip build_profile/bench_cpp \ --vectors datasets/t2i_1B/base.1B.fbin \ --queries datasets/t2i_1B/query.public.100K.fbin \ --neighbors datasets/t2i_1B/groundtruth.public.100K.ibin \ --output datasets/t2i_1B/index.usearch \ - --cos + --dtype bf16 --metric cos ``` > Optional parameters include `connectivity`, `expansion_add`, `expansion_search`. @@ -167,33 +154,80 @@ python/scripts/bench_cluster.py --help BigANN benchmark is a good starting point, if you are searching for large collections of high-dimensional vectors. Those often come with precomputed ground-truth neighbors, which is handy for recall evaluation. +Datasets below are grouped by scale; only configurations with matching ground truth support recall evaluation. + +### ~1M Scale โ€” Development & Testing + +| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth | +| :----------------------------------------- | :---------: | :--------: | :----: | :-------: | :-----------------: | +| [Unum UForm Wiki][unum-wiki-1m] | `f32` | 256 | IP | 1 GB | 100K queries, yes | +| [Unum UForm Creative Captions][unum-cc-3m] | `f32` | 256 | IP | 3 GB | cross-modal pairing | +| [Arxiv with E5][unum-arxiv-2m] | `f32` | 768 | IP | 6 GB | cross-modal pairing | + +### ~10M Scale + +| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth | +| :----------------------------------- | :---------: | :--------: | :----: | :-------: | :---------------: | +| [Meta BIGANN (SIFT)][bigann] | `u8` | 128 | L2 | 1.2 GB | 10K queries, yes | +| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 3.7 GB | 100K queries, yes | +| [Yandex Deep][yandex-deep] | `f32` | 96 | L2 | 3.6 GB | ยน no subset GT | -| Dataset | Scalar Type | Dimensions | Metric | Size | -| :------------------------------------------ | :---------: | :--------: | :----: | :-------: | -| [Unum UForm Creative Captions][unum-cc-3m] | `f32` | 256 | IP | 3 GB | -| [Unum UForm Wiki][unum-wiki-1m] | `f32` | 256 | IP | 1 GB | -| [Yandex Text-to-Image][yandex-t2i] subset | `f32` | 200 | Cos | 1 GB | -| [Yandex Deep10M][yandex-deep] subset | `f32` | 96 | L2 | 358 GB | -| [Microsoft SpaceV-100M][msft-spacev] subset | `i8` | 100 | L2 | 9.3 GB | -| | | | | | -| [Microsoft SpaceV-1B][msft-spacev] | `i8` | 100 | L2 | 93 GB | -| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 373 GB | -| [Yandex Deep1B][yandex-deep] | `f32` | 96 | L2 | 358 GB | -| [Yandex Text-to-Image][t2i] | `f32` | 200 | Cos | 750 GB | -| | | | | | -| [ViT-L/12 LAION][laion] | `f32` | 2048 | Cos | 2 - 10 TB | - -Luckily, smaller samples of those datasets are available. +> ยน Yandex only publishes ground truth computed against the full 1B dataset. A `base.10M.fbin` exists for +> download but using 1B ground truth with a subset will produce misleadingly low recall. Use it only for +> throughput/latency testing, not recall evaluation. + +### ~100M Scale + +| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth | +| :----------------------------------- | :---------: | :--------: | :----: | :-------: | :---------------: | +| [Meta BIGANN (SIFT)][bigann] | `u8` | 128 | L2 | 12 GB | 10K queries, yes | +| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 37 GB | 100K queries, yes | +| [Microsoft SpaceV][msft-spacev] | `i8` | 100 | L2 | 9.3 GB | 30K queries, yes | + +### ~1B Scale + +| Dataset | Scalar Type | Dimensions | Metric | Base Size | Ground Truth | +| :----------------------------------- | :---------: | :--------: | :----: | :-------: | :---------------: | +| [Meta BIGANN (SIFT)][bigann] | `u8` | 128 | L2 | 119 GB | 10K queries, yes | +| [Microsoft Turing-ANNS][msft-turing] | `f32` | 100 | L2 | 373 GB | 100K queries, yes | +| [Microsoft SpaceV][msft-spacev] | `i8` | 100 | L2 | 93 GB | 30K queries, yes | +| [Yandex Text-to-Image][yandex-t2i] | `f32` | 200 | Cos | 750 GB | 100K queries, yes | +| [Yandex Deep][yandex-deep] | `f32` | 96 | L2 | 358 GB | 10K queries, yes | +| | | | | | | +| [ViT-L/12 LAION][laion] | `f32` | 2048 | Cos | 2 - 10 TB | โ€” | [unum-cc-3m]: https://huggingface.co/datasets/unum-cloud/ann-cc-3m [unum-wiki-1m]: https://huggingface.co/datasets/unum-cloud/ann-wiki-1m -[unum-t2i-1m]: https://huggingface.co/datasets/unum-cloud/ann-t2i-1m +[unum-arxiv-2m]: https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m [msft-spacev]: https://github.com/ashvardanian/SpaceV [msft-turing]: https://learning2hash.github.io/publications/microsoftturinganns1B/ [yandex-t2i]: https://research.yandex.com/blog/benchmarks-for-billion-scale-similarity-search [yandex-deep]: https://research.yandex.com/blog/benchmarks-for-billion-scale-similarity-search +[bigann]: https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/ [laion]: https://laion.ai/blog/laion-5b/#download-the-data +### Unum UForm Creative Captions + +A cross-modal dataset of ~2.75M image-text pairs embedded with UForm VL English (256 dimensions). +No separate query/ground-truth files โ€” the natural ground truth is the image-text pairing: `image[i]` matches `text[i]`. + +```sh +mkdir -p datasets/cc_3M/ && \ + wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/images.uform-vl-english.fbin \ + -O datasets/cc_3M/images.fbin && \ + wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/texts.uform-vl-english.fbin \ + -O datasets/cc_3M/texts.fbin +``` + +To benchmark cross-modal join: + +```bash +python python/scripts/join.py \ + --vectors-a datasets/cc_3M/texts.fbin \ + --vectors-b datasets/cc_3M/images.fbin \ + --metric cos --diagnostics +``` + ### Unum UForm Wiki ```sh @@ -203,32 +237,205 @@ mkdir -p datasets/wiki_1M/ && \ wget -nc https://huggingface.co/datasets/unum-cloud/ann-wiki-1m/resolve/main/groundtruth.public.100K.ibin -P datasets/wiki_1M/ ``` +### Arxiv with E5 + +```sh +mkdir -p datasets/arxiv_2M/ && \ + wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/abstract.e5-base-v2.fbin -P datasets/arxiv_2M/ && + wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/title.e5-base-v2.fbin -P datasets/arxiv_2M/ +``` + ### Yandex Text-to-Image +> __Warning:__ Yandex only publishes ground truth computed against the full 1B dataset. +> A `base.1M.fbin` subset exists for download but has no matching ground truth โ€” using 1B ground truth +> with the 1M subset will produce misleadingly low recall. Use `base.1M.fbin` only for throughput/latency +> testing, not recall evaluation. + ```sh mkdir -p datasets/t2i_1B/ && \ wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/base.1B.fbin -P datasets/t2i_1B/ && - wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/base.1M.fbin -P datasets/t2i_1B/ && wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/query.public.100K.fbin -P datasets/t2i_1B/ && wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/groundtruth.public.100K.ibin -P datasets/t2i_1B/ ``` -### Yandex Deep1B +To run the benchmark (requires the full 1B base for valid recall): + +```bash +build_profile/bench_cpp \ + --vectors datasets/t2i_1B/base.1B.fbin \ + --queries datasets/t2i_1B/query.public.100K.fbin \ + --neighbors datasets/t2i_1B/groundtruth.public.100K.ibin \ + --output datasets/t2i_1B/index.usearch \ + --dtype bf16 --metric cos +``` + +### Yandex Deep + +> __Warning:__ Yandex only publishes ground truth computed against the full 1B dataset. +> Smaller base files (`base.10M.fbin`, `base.1M.fbin`) are available for download but have no matching +> ground truth โ€” using 1B ground truth with a subset will produce misleadingly low recall. Use those files +> only for throughput/latency testing, not recall evaluation. ```sh mkdir -p datasets/deep_1B/ && \ wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/base.1B.fbin -P datasets/deep_1B/ && - wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/base.10M.fbin -P datasets/deep_1B/ && wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/query.public.10K.fbin -P datasets/deep_1B/ && wget -nc https://storage.yandexcloud.net/yandex-research/ann-datasets/DEEP/groundtruth.public.10K.ibin -P datasets/deep_1B/ ``` -### Arxiv with E5 +To run the benchmark (requires the full 1B base for valid recall): + +```bash +build_profile/bench_cpp \ + --vectors datasets/deep_1B/base.1B.fbin \ + --queries datasets/deep_1B/query.public.10K.fbin \ + --neighbors datasets/deep_1B/groundtruth.public.10K.ibin \ + --output datasets/deep_1B/index.usearch \ + --dtype bf16 --metric l2sq +``` + +### Meta BIGANN โ€” SIFT + +The full 1B dataset is available from Meta. No pre-sliced subset base files exist, so range requests are +used to download only the first N vectors, followed by a header patch to update the vector count. +Pre-computed ground truth is available for 10M and 100M subsets. + +#### 10M subset, ~1.2 GB ```sh -mkdir -p datasets/arxiv_2M/ && \ - wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/abstract.e5-base-v2.fbin -P datasets/arxiv_2M/ && - wget -nc https://huggingface.co/datasets/unum-cloud/ann-arxiv-2m/resolve/main/title.e5-base-v2.fbin -P datasets/arxiv_2M/ +mkdir -p datasets/sift_10M/ && \ + wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/query.public.10K.u8bin -P datasets/sift_10M/ && \ + wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/GT_10M/bigann-10M -O datasets/sift_10M/groundtruth.public.10K.ibin && \ + wget --header="Range: bytes=0-1280000007" \ + https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/base.1B.u8bin \ + -O datasets/sift_10M/base.10M.u8bin && \ + python3 -c " +import struct +with open('datasets/sift_10M/base.10M.u8bin', 'r+b') as f: + f.write(struct.pack('I', 10_000_000)) +" +``` + +```bash +build_profile/bench_cpp \ + --vectors datasets/sift_10M/base.10M.u8bin \ + --queries datasets/sift_10M/query.public.10K.u8bin \ + --neighbors datasets/sift_10M/groundtruth.public.10K.ibin \ + --output datasets/sift_10M/index.usearch \ + --dtype u8 --metric l2sq +``` + +#### 100M subset, ~12 GB + +```sh +mkdir -p datasets/sift_100M/ && \ + wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/query.public.10K.u8bin -P datasets/sift_100M/ && \ + wget -nc https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/GT_100M/bigann-100M -O datasets/sift_100M/groundtruth.public.10K.ibin && \ + wget --header="Range: bytes=0-12800000007" \ + https://dl.fbaipublicfiles.com/billion-scale-ann-benchmarks/bigann/base.1B.u8bin \ + -O datasets/sift_100M/base.100M.u8bin && \ + python3 -c " +import struct +with open('datasets/sift_100M/base.100M.u8bin', 'r+b') as f: + f.write(struct.pack('I', 100_000_000)) +" +``` + +```bash +build_profile/bench_cpp \ + --vectors datasets/sift_100M/base.100M.u8bin \ + --queries datasets/sift_100M/query.public.10K.u8bin \ + --neighbors datasets/sift_100M/groundtruth.public.10K.ibin \ + --output datasets/sift_100M/index.usearch \ + --dtype u8 --metric l2sq +``` + +### Microsoft Turing-ANNS + +The full 1B dataset is ~373 GB of `f32` vectors with 100 dimensions. +Subsets can be obtained via range requests, followed by a header patch to update the vector count. +Pre-computed ground truth is available for 1M, 10M, and 100M subsets. + +#### 1M subset, ~400 MB + +```sh +mkdir -p datasets/turing_1M/ && \ + wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/query100K.fbin \ + -O datasets/turing_1M/query.public.100K.fbin && \ + wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/msturing-gt-1M \ + -O datasets/turing_1M/groundtruth.public.100K.ibin && \ + wget --header="Range: bytes=0-400000007" \ + https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/base1b.fbin \ + -O datasets/turing_1M/base.1M.fbin && \ + python3 -c " +import struct +with open('datasets/turing_1M/base.1M.fbin', 'r+b') as f: + f.write(struct.pack('I', 1_000_000)) +" +``` + +```bash +build_profile/bench_cpp \ + --vectors datasets/turing_1M/base.1M.fbin \ + --queries datasets/turing_1M/query.public.100K.fbin \ + --neighbors datasets/turing_1M/groundtruth.public.100K.ibin \ + --output datasets/turing_1M/index.usearch \ + --dtype bf16 --metric l2sq +``` + +#### 10M subset, ~3.7 GB + +```sh +mkdir -p datasets/turing_10M/ && \ + wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/query100K.fbin \ + -O datasets/turing_10M/query.public.100K.fbin && \ + wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/msturing-gt-10M \ + -O datasets/turing_10M/groundtruth.public.100K.ibin && \ + wget --header="Range: bytes=0-4000000007" \ + https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/base1b.fbin \ + -O datasets/turing_10M/base.10M.fbin && \ + python3 -c " +import struct +with open('datasets/turing_10M/base.10M.fbin', 'r+b') as f: + f.write(struct.pack('I', 10_000_000)) +" +``` + +```bash +build_profile/bench_cpp \ + --vectors datasets/turing_10M/base.10M.fbin \ + --queries datasets/turing_10M/query.public.100K.fbin \ + --neighbors datasets/turing_10M/groundtruth.public.100K.ibin \ + --output datasets/turing_10M/index.usearch \ + --dtype bf16 --metric l2sq +``` + +#### 100M subset, ~37 GB + +```sh +mkdir -p datasets/turing_100M/ && \ + wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/query100K.fbin \ + -O datasets/turing_100M/query.public.100K.fbin && \ + wget -nc https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/msturing-gt-100M \ + -O datasets/turing_100M/groundtruth.public.100K.ibin && \ + wget --header="Range: bytes=0-40000000007" \ + https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/base1b.fbin \ + -O datasets/turing_100M/base.100M.fbin && \ + python3 -c " +import struct +with open('datasets/turing_100M/base.100M.fbin', 'r+b') as f: + f.write(struct.pack('I', 100_000_000)) +" +``` + +```bash +build_profile/bench_cpp \ + --vectors datasets/turing_100M/base.100M.fbin \ + --queries datasets/turing_100M/query.public.100K.fbin \ + --neighbors datasets/turing_100M/groundtruth.public.100K.ibin \ + --output datasets/turing_100M/index.usearch \ + --dtype bf16 --metric l2sq ``` ### Microsoft SpaceV @@ -251,16 +458,14 @@ mkdir -p datasets/spacev_100M/ && \ wget -nc https://huggingface.co/datasets/unum-cloud/ann-spacev-100m/resolve/main/groundtruth.30K.f32bin -P datasets/spacev_100M/ ``` -To run the benchmark on those 100M datasets: - ```bash build_profile/bench_cpp \ --vectors datasets/spacev_100M/base.100M.i8bin \ --queries datasets/spacev_100M/query.30K.i8bin \ --neighbors datasets/spacev_100M/groundtruth.30K.i32bin \ --output datasets/spacev_100M/index.usearch \ - --i8quant \ - --l2sq + --dtype i8 \ + --metric l2sq ``` ## Profiling diff --git a/CITATION.cff b/CITATION.cff index bd942eddc..9bfe14d71 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,4 +8,4 @@ title: "USearch by Unum Cloud" version: 2.24.0 doi: 10.5281/zenodo.7949416 date-released: 2023-10-22 -url: "https://github.com/unum-cloud/usearch" +url: "https://github.com/unum-cloud/USearch" diff --git a/CMakeLists.txt b/CMakeLists.txt index b6e4bb483..36181a005 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ project( VERSION 2.24.0 LANGUAGES C CXX DESCRIPTION "Smaller & Faster Single-File Vector Search Engine from Unum" - HOMEPAGE_URL "https://github.com/unum-cloud/usearch" + HOMEPAGE_URL "https://github.com/unum-cloud/USearch" ) # Determine if USearch is built as a subproject (using `add_subdirectory`) or if it is the main project @@ -25,9 +25,8 @@ endif () option(USEARCH_INSTALL "Install CMake targets" OFF) option(USEARCH_USE_OPENMP "Use OpenMP for a thread pool" OFF) -option(USEARCH_USE_SIMSIMD "Use SimSIMD hardware-accelerated metrics" OFF) +option(USEARCH_USE_NUMKONG "Use NumKong hardware-accelerated metrics" OFF) option(USEARCH_USE_JEMALLOC "Use JeMalloc for faster memory allocations" OFF) -option(USEARCH_USE_FP16LIB "Use software emulation for half-precision types" ON) option(USEARCH_BUILD_TEST_CPP "Compile a native unit test in C++" ${USEARCH_IS_MAIN_PROJECT}) option(USEARCH_BUILD_BENCH_CPP "Compile a native benchmark in C++" ${USEARCH_IS_MAIN_PROJECT}) @@ -129,8 +128,7 @@ endif () target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_OPENMP=$") # Supplementary compilation settings affecting "index_plugins.hpp" -target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_FP16LIB=$") -target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_SIMSIMD=$") +target_compile_definitions(${USEARCH_TARGET_NAME} INTERFACE "USEARCH_USE_NUMKONG=$") # Define which types can be compiled target_compile_definitions( @@ -150,19 +148,12 @@ target_include_directories( ${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} INTERFACE $ $ ) -if (USEARCH_USE_FP16LIB) - target_include_directories( - ${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} INTERFACE $ - $ - ) -endif () -if (USEARCH_USE_SIMSIMD) +if (USEARCH_USE_NUMKONG) target_include_directories( ${USEARCH_TARGET_NAME} ${USEARCH_SYSTEM_INCLUDE} - INTERFACE $ $ + INTERFACE $ $ ) endif () - # Install a pkg-config file, so other tools can find this configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/pkg-config.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc") @@ -221,9 +212,7 @@ if (NOT CMAKE_BUILD_TYPE) endif () # Include directories -set(USEARCH_HEADER_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/fp16/include" - "${CMAKE_CURRENT_SOURCE_DIR}/simsimd/include" -) +set(USEARCH_HEADER_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/numkong/include") # Function to setup target function (setup_target TARGET_NAME) @@ -383,23 +372,27 @@ function (setup_target TARGET_NAME) target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_OPENMP=$") # Supplementary compilation settings affecting "index_plugins.hpp" - target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_FP16LIB=$") - target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_SIMSIMD=$") + target_compile_definitions(${TARGET_NAME} PRIVATE "USEARCH_USE_NUMKONG=$") - # Pass through SIMSIMD target definitions if they're set - foreach(target IN ITEMS HASWELL SKYLAKE ICE GENOA SAPPHIRE TURIN SIERRA NEON NEON_I8 NEON_F16 NEON_BF16 SVE SVE_I8 SVE_F16 SVE_BF16 SVE2) - if(DEFINED SIMSIMD_TARGET_${target}) - target_compile_definitions(${TARGET_NAME} PRIVATE "SIMSIMD_TARGET_${target}=${SIMSIMD_TARGET_${target}}") - endif() - endforeach() - - # Pass through SIMSIMD_DYNAMIC_DISPATCH if set - if(DEFINED SIMSIMD_DYNAMIC_DISPATCH) - target_compile_definitions(${TARGET_NAME} PRIVATE "SIMSIMD_DYNAMIC_DISPATCH=${SIMSIMD_DYNAMIC_DISPATCH}") - endif() + # Link NumKong: the `nk_shared` target from `add_subdirectory(numkong)` carries its own include paths, compile + # definitions (NK_DYNAMIC_DISPATCH, NK_TARGET_*), and ISA probes. + if (USEARCH_USE_NUMKONG AND TARGET nk_shared) + target_compile_definitions(${TARGET_NAME} PRIVATE "NK_DYNAMIC_DISPATCH=1") + target_link_libraries(${TARGET_NAME} PRIVATE nk_shared) + endif () endfunction () +# Delegate NumKong compilation to its own CMakeLists.txt โ€” it handles ISA detection, NK_TARGET_* flags, and SIMD +# dispatch backends internally. +if (USEARCH_USE_NUMKONG) + set(NK_BUILD_SHARED + ON + CACHE BOOL "" FORCE + ) + add_subdirectory(numkong) +endif () + # Must be called before "add_subdirectory()". See # https://stackoverflow.com/questions/30250494/ctest-not-detecting-tests. enable_testing() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c4cfc937f..07605fcc5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,7 @@ To keep the quality of the code high, we have a set of guidelines common to [all ## Before you start Before building the first time, please pull `git` submodules. -That's how we bring in SimSIMD and other optional dependencies to test all of the available functionality. +That's how we bring in NumKong and other optional dependencies to test all of the available functionality. ```sh git submodule update --init --recursive @@ -26,7 +26,7 @@ Our primary C++ implementation uses CMake for builds. If this is your first experience with CMake, use the following commands to get started: ```sh -sudo apt-get update && sudo apt-get install cmake build-essential libjemalloc-dev g++-12 gcc-12 # Ubuntu +sudo apt-get update && sudo apt-get install cmake build-essential libjemalloc-dev # Ubuntu brew install libomp llvm # macOS ``` @@ -64,14 +64,13 @@ The CMakeLists.txt file has a number of options you can pass: - `USEARCH_BUILD_SQLITE` - build the SQLite extension ([no Windows](https://gist.github.com/zeljic/d8b542788b225b1bcb5fce169ee28c55)) - Which dependencies to use: - `USEARCH_USE_OPENMP` - use OpenMP for parallelism - - `USEARCH_USE_SIMSIMD` - use SimSIMD for vectorization + - `USEARCH_USE_NUMKONG` - use NumKong for vectorization - `USEARCH_USE_JEMALLOC` - use Jemalloc for memory management - - `USEARCH_USE_FP16LIB` - use software emulation for half-precision floating point Putting all of this together, compiling all targets on most platforms should work with the following snippet: ```sh -cmake -D CMAKE_BUILD_TYPE=Release -D USEARCH_USE_FP16LIB=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1 -D USEARCH_USE_JEMALLOC=1 -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_BENCH_CPP=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_SQLITE=0 -B build_release +cmake -D CMAKE_BUILD_TYPE=Release -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_JEMALLOC=1 -D USEARCH_BUILD_TEST_CPP=1 -D USEARCH_BUILD_BENCH_CPP=1 -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_BUILD_SQLITE=0 -B build_release cmake --build build_release --config Release build_release/test_cpp @@ -86,9 +85,8 @@ cmake \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_C_COMPILER="$(brew --prefix llvm)/bin/clang" \ -D CMAKE_CXX_COMPILER="$(brew --prefix llvm)/bin/clang++" \ - -D USEARCH_USE_FP16LIB=1 \ + -D USEARCH_USE_NUMKONG=1 \ -D USEARCH_USE_OPENMP=1 \ - -D USEARCH_USE_SIMSIMD=1 \ -D USEARCH_USE_JEMALLOC=1 \ -D USEARCH_BUILD_TEST_CPP=1 \ -D USEARCH_BUILD_BENCH_CPP=1 \ @@ -178,8 +176,7 @@ cmake -B build_artifacts \ -D USEARCH_BUILD_LIB_C=1 \ -D USEARCH_BUILD_TEST_CPP=0 \ -D USEARCH_BUILD_BENCH_CPP=0 \ - -D USEARCH_USE_SIMSIMD=0 \ - -D USEARCH_USE_FP16LIB=1 + -D USEARCH_USE_NUMKONG=0 cmake --build build_artifacts --config RelWithDebInfo file build_artifacts/libusearch_c.so # Verify the output @@ -214,7 +211,7 @@ Following options are enabled: - The `-p no:warnings` option will suppress and allow warnings. ```sh -uv pip install pytest pytest-repeat numpy # for repeated fuzzy tests +uv pip install -e . --group tests # for repeated fuzzy tests python -m pytest # if you trust the default settings python -m pytest python/scripts/ -s -x -p no:warnings # to overwrite the default settings ``` @@ -222,8 +219,8 @@ python -m pytest python/scripts/ -s -x -p no:warnings # to overwrite the default Linting: ```sh -pip install ruff -ruff --format=github --select=E9,F63,F7,F82 --target-version=py310 python +uv pip install -e . --group lint +ruff --format=github python ``` Before merging your changes you may want to test your changes against the entire matrix of Python versions USearch supports. @@ -281,14 +278,13 @@ RUN npm init -y RUN yum install tar git python3 cmake gcc-c++ -y && yum groupinstall "Development Tools" -y # Assuming AWS Linux 2 uses old compilers: -ENV USEARCH_USE_FP16LIB 1 -ENV USEARCH_USE_SIMSIMD 1 -ENV SIMSIMD_TARGET_HASWELL 1 -ENV SIMSIMD_TARGET_SKYLAKE 0 -ENV SIMSIMD_TARGET_ICE 0 -ENV SIMSIMD_TARGET_SAPPHIRE 0 -ENV SIMSIMD_TARGET_NEON 1 -ENV SIMSIMD_TARGET_SVE 0 +ENV USEARCH_USE_NUMKONG 1 +ENV NK_TARGET_HASWELL 1 +ENV NK_TARGET_SKYLAKE 0 +ENV NK_TARGET_ICELAKE 0 +ENV NK_TARGET_SAPPHIRE 0 +ENV NK_TARGET_NEON 1 +ENV NK_TARGET_SVE 0 # For specific PR: # RUN npm install --build-from-source unum-cloud/usearch#pull/302/head @@ -332,16 +328,16 @@ The reason for that is the heuristic that Cargo uses to determine the files to i > Regardless of whether exclude or include is specified, the following files are always excluded: > Any sub-packages will be skipped (any subdirectory that contains a Cargo.toml file). -Assuming both SimSIMD and StringZilla contain their own `Cargo.toml` files, we need to temporarily exclude them from the package. +Assuming both NumKong and StringZilla contain their own `Cargo.toml` files, we need to temporarily exclude them from the package. ```sh -mv simsimd/Cargo.toml simsimd/Cargo.toml.bak +mv numkong/Cargo.toml numkong/Cargo.toml.bak mv stringzilla/Cargo.toml stringzilla/Cargo.toml.bak cargo package --list --allow-dirty cargo publish # Revert back -mv simsimd/Cargo.toml.bak simsimd/Cargo.toml +mv numkong/Cargo.toml.bak numkong/Cargo.toml mv stringzilla/Cargo.toml.bak stringzilla/Cargo.toml ``` @@ -388,7 +384,7 @@ USearch provides Go bindings, that depend on the C library that must be installe So one should first compile the C library, link it with Go, and only then run tests. ```sh -cmake -B build_release -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1 +cmake -B build_release -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_OPENMP=1 cmake --build build_release --config Release -j cp build_release/libusearch_c.so golang/ # or .dylib to install the library on macOS @@ -452,8 +448,7 @@ g++ -shared -o java\cloud\unum\usearch\USearchJNI.dll java\cloud\unum\usearch\cl # macOS g++ -std=c++11 -c -fPIC \ -Iinclude \ - -Ifp16/include \ - -Isimsimd/include \ + -Inumkong/include \ -I${JAVA_HOME}/include -I${JAVA_HOME}/include/darwin java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp -o java/cloud/unum/usearch/cloud_unum_usearch_Index.o g++ -dynamiclib -o java/cloud/unum/usearch/libusearch.dylib java/cloud/unum/usearch/cloud_unum_usearch_Index.o -lc @@ -480,7 +475,7 @@ USearch provides CSharp bindings, that depend on the C library that must be inst So one should first compile the C library, link it with CSharp, and only then run tests. ```sh -cmake -B build_artifacts -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_OPENMP=1 -D USEARCH_USE_SIMSIMD=1 +cmake -B build_artifacts -D USEARCH_BUILD_LIB_C=1 -D USEARCH_BUILD_TEST_C=1 -D USEARCH_USE_NUMKONG=1 -D USEARCH_USE_OPENMP=1 cmake --build build_artifacts --config Release -j ``` @@ -561,8 +556,8 @@ cmake -DCMAKE_TOOLCHAIN_FILE=${WASI_SDK_PATH}/share/cmake/wasi-sdk.cmake . ## Working on Sub-Modules -Extending metrics in SimSIMD: +Extending metrics in NumKong: ```sh -git push --set-upstream https://github.com/ashvardanian/simsimd.git HEAD:main +git push --set-upstream https://github.com/ashvardanian/numkong.git HEAD:main ``` diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index ee7e3d409..000000000 --- a/Cargo.lock +++ /dev/null @@ -1,495 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "anstyle" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "bitflags" -version = "2.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" - -[[package]] -name = "cc" -version = "1.2.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" -dependencies = [ - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" - -[[package]] -name = "clap" -version = "4.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" -dependencies = [ - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_lex" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" - -[[package]] -name = "codespan-reporting" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" -dependencies = [ - "serde", - "termcolor", - "unicode-width", -] - -[[package]] -name = "cxx" -version = "1.0.160" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1149bab7a5580cb267215751389597c021bfad13c0bb00c54e19559333764c" -dependencies = [ - "cc", - "cxxbridge-cmd", - "cxxbridge-flags", - "cxxbridge-macro", - "foldhash", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.160" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aeeaf1aefae8e0f5141920a7ecbc64a22ab038d4b4ac59f2d19e0effafd5b53" -dependencies = [ - "cc", - "codespan-reporting", - "indexmap", - "proc-macro2", - "quote", - "scratch", - "syn", -] - -[[package]] -name = "cxxbridge-cmd" -version = "1.0.160" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c36ac1f9a72064b1f41fd7b49a4c1b3bf33b9ccb1274874dda6d264f57c55964" -dependencies = [ - "clap", - "codespan-reporting", - "indexmap", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.160" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "170c6ff5d009663866857a91ebee55b98ea4d4b34e7d7aba6dc4a4c95cc7b748" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.160" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4984a142211026786011a7e79fa22faa1eca1e9cbf0e60bffecfd57fd3db88f1" -dependencies = [ - "indexmap", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - -[[package]] -name = "fork_union" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec1c7f61ff17f21416e5fece8704d86ca9c9b09d189c176ce9067e2b75f39b38" -dependencies = [ - "cc", -] - -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi", -] - -[[package]] -name = "hashbrown" -version = "0.15.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" - -[[package]] -name = "indexmap" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "libc" -version = "0.2.174" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" - -[[package]] -name = "libm" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" - -[[package]] -name = "link-cplusplus" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a6f6da007f968f9def0d65a05b187e2960183de70c160204ecfccf0ee330212" -dependencies = [ - "cc", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "rand" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" -dependencies = [ - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_distr" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" -dependencies = [ - "num-traits", - "rand", -] - -[[package]] -name = "rustversion" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" - -[[package]] -name = "scratch" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f6280af86e5f559536da57a45ebc84948833b3bee313a7dd25232e09c878a52" - -[[package]] -name = "serde" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - -[[package]] -name = "syn" -version = "2.0.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "unicode-width" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" - -[[package]] -name = "usearch" -version = "2.24.0" -dependencies = [ - "cxx", - "cxx-build", - "fork_union", - "rand", - "rand_chacha", - "rand_distr", -] - -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - -[[package]] -name = "zerocopy" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] diff --git a/Cargo.toml b/Cargo.toml index a6c93838f..5f4043481 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,41 +1,41 @@ [package] -name = "usearch" -version = "2.24.0" authors = ["Ash Vardanian <1983160+ashvardanian@users.noreply.github.com>"] description = "Smaller & Faster Single-File Vector Search Engine from Unum" +documentation = "https://unum-cloud.github.io/USearch" edition = "2021" -license = "Apache-2.0" -publish = true -repository = "https://github.com/unum-cloud/usearch" -documentation = "https://unum-cloud.github.io/usearch" include = [ - "/rust/**", - "/include/**", - "/fp16/include/**", - "/simsimd/include/**", - "/simsimd/c/**", - "/stringzilla/include/**", - "/build.rs", + "/rust/**", + "/include/**", + "/stringzilla/include/**", + "/build.rs", ] +license = "Apache-2.0" +name = "usearch" +publish = true +repository = "https://github.com/unum-cloud/USearch" +version = "2.24.0" [features] -default = ["simsimd", "fp16lib"] # SimSIMD is enabled by default -simsimd = [] # No need to do anything to enable SimSIMD by default -fp16lib = [] # Without this FP16 we lose precision downcasting -openmp = [] # Optional: Users can enable OpenMP +default = ["numkong"] +numkong = ["dep:numkong"] +openmp = [] +simsimd = ["numkong"] [lib] name = "usearch" path = "rust/lib.rs" [dependencies] -cxx = "1.0.160" +cxx = "1.0.194" +# Published crates pull `numkong` from crates.io (the `path` is stripped at publish time). +# The local `path = "numkong"` is only used during development against the git submodule. +numkong = {version = ">=7.5.0", path = "numkong", optional = true} [build-dependencies] -cxx-build = "1.0.160" +cxx-build = "1.0.194" [dev-dependencies] -fork_union = "2.1.1" # for concurrency tests -rand_distr = "0.5.1" # uniform floats distribution -rand_chacha = "0.9.0" # random number generator -rand = "0.9.1" +fork_union = "2.1.1" # for concurrency tests +rand = "0.10.0" +rand_chacha = "0.10.0" # random number generator +rand_distr = "0.6.0" # uniform floats distribution diff --git a/Package.swift b/Package.swift index 646dff293..95da96c9c 100644 --- a/Package.swift +++ b/Package.swift @@ -4,10 +4,10 @@ import PackageDescription let cxxSettings: [CXXSetting] = [ .headerSearchPath("../include/"), - .headerSearchPath("../fp16/include/"), - .headerSearchPath("../simsimd/include/"), - .define("USEARCH_USE_FP16LIB", to: "1"), - .define("USEARCH_USE_SIMSIMD", to: "1"), + .define("USEARCH_USE_NUMKONG", to: "1"), + .define("NK_DYNAMIC_DISPATCH", to: "1"), + .define("NK_NATIVE_F16", to: "0"), + .define("NK_NATIVE_BF16", to: "0"), ] var targets: [Target] = [] @@ -17,8 +17,11 @@ var targets: [Target] = [] targets.append( .target( name: "USearchObjectiveC", + dependencies: [ + .product(name: "CNumKongDispatch", package: "NumKong"), + ], path: "objc", - sources: ["USearchObjective.mm", "../simsimd/c/lib.c"], + sources: ["USearchObjective.mm"], cxxSettings: cxxSettings ) ) @@ -28,6 +31,9 @@ var targets: [Target] = [] targets += [ .target( name: "USearchC", + dependencies: [ + .product(name: "CNumKongDispatch", package: "NumKong"), + ], path: "c", sources: ["usearch.h", "lib.cpp"], publicHeadersPath: ".", @@ -70,8 +76,16 @@ products.append( let package = Package( name: "USearch", + platforms: [ + .macOS(.v12), + .iOS(.v15), + .watchOS(.v8), + .tvOS(.v15), + ], products: products, - dependencies: [], + dependencies: [ + .package(url: "https://github.com/ashvardanian/NumKong", from: "7.5.0"), + ], targets: targets, cxxLanguageStandard: CXXLanguageStandard.cxx11 ) diff --git a/README.md b/README.md index 147a93d25..0a1319d99 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@

USearch

Smaller & Faster Single-File
-Similarity Search & Clustering Engine for Vectors & ๐Ÿ”œ Texts +Similarity Search & Clustering Engine for Vectors & ๐Ÿ”œ Texts


@@ -14,7 +14,7 @@ Similarity Search & Clustering Engine for Blog     -GitHub +GitHub

@@ -61,7 +61,7 @@ Linux โ€ข macOS โ€ข Windows โ€ข iOS โ€ข Android โ€ข WebAssembly โ€ข - ๐Ÿ”„ Near-real-time [clustering and sub-clustering](#clustering) for Tens or Millions of clusters. [faiss]: https://github.com/facebookresearch/faiss -[usearch-header]: https://github.com/unum-cloud/usearch/blob/main/include/usearch/index.hpp +[usearch-header]: https://github.com/unum-cloud/USearch/blob/main/include/usearch/index.hpp [obscure-use-cases]: https://ashvardanian.com/posts/abusing-vector-search [hnsw-algorithm]: https://arxiv.org/abs/1603.09320 [simd]: https://en.wikipedia.org/wiki/Single_instruction,_multiple_data @@ -185,8 +185,8 @@ one_in_many: Matches = search(vectors, vector, 50, MetricKind.L2sq, exact=True) many_in_many: BatchMatches = search(vectors, vectors, 50, MetricKind.L2sq, exact=True) ``` -If you pass the `exact=True` argument, the system bypasses indexing altogether and performs a brute-force search through the entire dataset using SIMD-optimized similarity metrics from [SimSIMD](https://github.com/ashvardanian/simsimd). -When compared to FAISS's `IndexFlatL2` in Google Colab, __[USearch may offer up to a 20x performance improvement](https://github.com/unum-cloud/usearch/issues/176#issuecomment-1666650778)__: +If you pass the `exact=True` argument, the system bypasses indexing altogether and performs a brute-force search through the entire dataset using SIMD-optimized similarity metrics from [NumKong](https://github.com/ashvardanian/numkong). +When compared to FAISS's `IndexFlatL2` in Google Colab, __[USearch may offer up to a 20x performance improvement](https://github.com/unum-cloud/USearch/issues/176#issuecomment-1666650778)__: - `faiss.IndexFlatL2`: __55.3 ms__. - `usearch.index.search`: __2.54 ms__. @@ -268,7 +268,7 @@ As part of the quantization process, the vectors are normalized to unit length a When quantizing to `b1x8_t` single-bit representations, note that it's only valid for binary metrics like Jaccard, Hamming, etc. As part of the quantization process, the scalar components greater than zero are set to `true`, and the rest to `false`. -![USearch uint40_t support](https://github.com/unum-cloud/usearch/blob/main/assets/usearch-neighbor-types.png?raw=true) +![USearch uint40_t support](https://github.com/unum-cloud/USearch/blob/main/assets/usearch-neighbor-types.png?raw=true) Using smaller numeric types will save you RAM needed to store the vectors, but you can also compress the neighbors lists forming our proximity graphs. By default, 32-bit `uint32_t` is used to enumerate those, which is not enough if you need to address over 4 Billion entries. @@ -533,15 +533,15 @@ index = Index(ndim=ndim, metric=CompiledMetric( - [x] ScyllaDB: [Rust](https://github.com/scylladb/vector-store), [presentation](https://www.slideshare.net/slideshow/vector-search-with-scylladb-by-szymon-wasik/276571548). - [x] TiDB & TiFlash: [C++](https://github.com/pingcap/tiflash), [announcement](https://www.pingcap.com/article/introduce-vector-search-indexes-in-tidb/). - [x] YugaByte: [C++](https://github.com/yugabyte/yugabyte-db/blob/366b9f5e3c4df3a1a17d553db41d6dc50146f488/src/yb/vector_index/usearch_wrapper.cc). -- [x] Google: [UniSim](https://github.com/google/unisim), [RetSim](https://arxiv.org/abs/2311.17264) paper. - [x] MemGraph: [C++](https://github.com/memgraph/memgraph/blob/784dd8520f65050d033aea8b29446e84e487d091/src/storage/v2/indices/vector_index.cpp), [announcement](https://memgraph.com/blog/simplify-data-retrieval-memgraph-vector-search). -- [x] LanternDB: [C++](https://github.com/lanterndata/lantern), [Rust](https://github.com/lanterndata/lantern_extras), [docs](https://lantern.dev/blog/hnsw-index-creation). +- [x] Google: [UniSim](https://github.com/google/unisim), [RetSim](https://arxiv.org/abs/2311.17264) paper. - [x] LangChain: [Python](https://github.com/langchain-ai/langchain/releases/tag/v0.0.257) and [JavaScript](https://github.com/hwchase17/langchainjs/releases/tag/0.0.125). - [x] Microsoft Semantic Kernel: [Python](https://github.com/microsoft/semantic-kernel/releases/tag/python-0.3.9.dev) and C#. - [x] GPTCache: [Python](https://github.com/zilliztech/GPTCache/releases/tag/0.1.29). - [x] Sentence-Transformers: Python [docs](https://www.sbert.net/docs/package_reference/quantization.html#sentence_transformers.quantization.semantic_search_usearch). - [x] Pathway: [Rust](https://github.com/pathwaycom/pathway). - [x] Vald: [GoLang](https://github.com/vdaas/vald). +- [x] MatrixOne: [GoLang](https://github.com/matrixorigin/matrixone). ## Citations @@ -551,7 +551,7 @@ index = Index(ndim=ndim, metric=CompiledMetric( doi = {10.5281/zenodo.7949416}, author = {Vardanian, Ash}, title = {{USearch by Unum Cloud}}, -url = {https://github.com/unum-cloud/usearch}, +url = {https://github.com/unum-cloud/USearch}, version = {2.24.0}, year = {2023}, month = oct, diff --git a/binding.gyp b/binding.gyp index dfe10d820..c62b5b869 100644 --- a/binding.gyp +++ b/binding.gyp @@ -1,8 +1,16 @@ { + "variables": { + "numkong_root": "./numkong", + }, + "includes": ["./numkong/numkong.gypi"], "targets": [ { "target_name": "usearch", - "sources": ["javascript/lib.cpp", "simsimd/c/lib.c"], + "sources": ["javascript/lib.cpp"], + "dependencies": [ + " Result<(), Box> { let mut build = cxx_build::bridge("rust/lib.rs"); build @@ -6,59 +12,45 @@ fn main() { .flag_if_supported("-Wno-unknown-pragmas") .warnings(false) .include("include") - .include("rust") - .include("fp16/include") - .include("simsimd/include"); + .include("rust"); // Check for optional features if cfg!(feature = "openmp") { build.define("USEARCH_USE_OPENMP", "1"); + let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + if target_os == "windows" { + build.flag_if_supported("/openmp"); + } else { + build.flag_if_supported("-fopenmp"); + println!("cargo:rustc-link-lib=dylib=omp"); + } } else { build.define("USEARCH_USE_OPENMP", "0"); } - if cfg!(feature = "fp16lib") { - build.define("USEARCH_USE_FP16LIB", "1"); - } else { - build.define("USEARCH_USE_FP16LIB", "0"); - } - - // Define all possible SIMD targets as 1 - let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); - - let mut flags_to_try; - if cfg!(feature = "simsimd") { + // When the `numkong` feature is enabled, the `numkong` crate (pulled from crates.io, + // not the local git submodule) compiles all SIMD kernels itself, with dynamic dispatch + // and fallback across ISA backends. We only need its include path for the C++ headers. + if cfg!(feature = "numkong") { + let numkong_include = std::env::var("DEP_NUMKONG_INCLUDE") + .map_err(|_| "numkong crate must set DEP_NUMKONG_INCLUDE via `links` metadata")?; build - .file("simsimd/c/lib.c") - .define("USEARCH_USE_SIMSIMD", "1") - .define("SIMSIMD_DYNAMIC_DISPATCH", "1") - .define("SIMSIMD_NATIVE_BF16", "0") - .define("SIMSIMD_NATIVE_F16", "0"); - flags_to_try = match target_arch.as_str() { - "arm" | "aarch64" => vec![ - "SIMSIMD_TARGET_NEON", - "SIMSIMD_TARGET_NEON_I8", - "SIMSIMD_TARGET_NEON_F16", - "SIMSIMD_TARGET_NEON_BF16", - "SIMSIMD_TARGET_SVE", - "SIMSIMD_TARGET_SVE_I8", - "SIMSIMD_TARGET_SVE_F16", - "SIMSIMD_TARGET_SVE_BF16", - ], - _ => vec![ - "SIMSIMD_TARGET_HASWELL", - "SIMSIMD_TARGET_SKYLAKE", - "SIMSIMD_TARGET_ICE", - "SIMSIMD_TARGET_GENOA", - "SIMSIMD_TARGET_SAPPHIRE", - ], - }; + .include(&numkong_include) + .define("USEARCH_USE_NUMKONG", "1") + .define("NK_DYNAMIC_DISPATCH", "1") + .define("NK_NATIVE_BF16", "0") + .define("NK_NATIVE_F16", "0"); + + // Link the NumKong static library compiled by the numkong crate. Cargo propagates + // the library search path via `links` metadata, but doesn't re-emit `-lnumkong` + // for downstream native code. Our C++ libusearch.a references NumKong symbols + // (nk_find_kernel_punned, nk_capabilities), so we must link explicitly. + println!("cargo:rustc-link-lib=static=numkong"); } else { - build.define("USEARCH_USE_SIMSIMD", "0"); - flags_to_try = vec![]; + build.define("USEARCH_USE_NUMKONG", "0"); } - let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap(); + let target_os = std::env::var("CARGO_CFG_TARGET_OS")?; // Conditional compilation depending on the target operating system. if target_os == "linux" || target_os == "android" { build @@ -89,33 +81,7 @@ fn main() { .define("_ALLOW_POINTER_TO_CONST_MISMATCH", None); } - let base_build = build.clone(); - - let mut pop_flag = None; - loop { - let mut sub_build = base_build.clone(); - for flag in &flags_to_try { - sub_build.define(flag, "1"); - } - let result = sub_build.try_compile("usearch"); - if result.is_err() { - if let Some(flag) = pop_flag { - println!( - "cargo:warning=Failed to compile after disabling {:?}, trying next configuration...", - flag - ); - } else if !flags_to_try.is_empty() { - print!("cargo:warning=Failed to compile with all SIMD backends..."); - } - - pop_flag = flags_to_try.pop(); - if pop_flag.is_none() { - result.unwrap(); - } - } else { - break; - } - } + build.try_compile("usearch")?; println!("cargo:rerun-if-changed=rust/lib.rs"); println!("cargo:rerun-if-changed=rust/lib.cpp"); @@ -123,4 +89,5 @@ fn main() { println!("cargo:rerun-if-changed=include/index_plugins.hpp"); println!("cargo:rerun-if-changed=include/index_dense.hpp"); println!("cargo:rerun-if-changed=include/usearch/index.hpp"); + Ok(()) } diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt index dcdd39dbe..5e10c405d 100644 --- a/c/CMakeLists.txt +++ b/c/CMakeLists.txt @@ -1,9 +1,4 @@ - -# Define the source files conditionally set(USEARCH_LIB_SOURCES lib.cpp) -if (USEARCH_USE_SIMSIMD) - list(APPEND USEARCH_LIB_SOURCES ../simsimd/c/lib.c) -endif () if (USEARCH_BUILD_TEST_C) add_executable(test_c test.c ${USEARCH_LIB_SOURCES}) @@ -11,6 +6,10 @@ if (USEARCH_BUILD_TEST_C) include(CTest) enable_testing() add_test(NAME test_c COMMAND test_c) + + # Export the dynamic symbol table so `backtrace_symbols_fd` can resolve + # function names when the in-test crash handler fires. + set_target_properties(test_c PROPERTIES ENABLE_EXPORTS ON) endif () # This article discusses a better way to allow building either static or shared libraries: diff --git a/c/README.md b/c/README.md index 3dac28a14..c47e4a507 100644 --- a/c/README.md +++ b/c/README.md @@ -3,7 +3,7 @@ ## Installation USearch vector-search engine can be integrated into your project using CMake. -Alternatively, you can download one of the precompiled binaries from the [releases page](https://github.com/unum-cloud/usearch/releases). +Alternatively, you can download one of the precompiled binaries from the [releases page](https://github.com/unum-cloud/USearch/releases). ## Quickstart @@ -89,7 +89,7 @@ usearch_metadata_buffer(buffer, bytes, &opts, &error); ## Metrics -USearch comes pre-packaged with SimSIMD, bringing over 100 SIMD-accelerated distance kernels for x86 and ARM architectures. +USearch comes pre-packaged with NumKong, bringing over 100 SIMD-accelerated distance kernels for x86 and ARM architectures. That includes: - `usearch_metric_cos_k` - Cosine Similarity metric, defined as `Cos = 1 - sum(a[i] * b[i]) / (sqrt(sum(a[i]^2) * sqrt(sum(b[i]^2)))`. @@ -107,7 +107,7 @@ That includes: You can also define your own metrics by implementing the `usearch_metric_t` interface: ```c -simsimd_distance_t callback(void const* a, void const* b, void* state) { +usearch_distance_t callback(void const* a, void const* b, void* state) { // Your custom metric implementation here } @@ -183,15 +183,15 @@ Alternatively, you can benefit from faster thread-pools and priority queues for size_t threads = 0; size_t top_k = 10; size_t dataset_count = 1000, queries_count = 10; -simsimd_f16_t dataset[dataset_count][dimensions]; -simsimd_f16_t queries[queries_count][dimensions]; +nk_f16_t dataset[dataset_count][dimensions]; +nk_f16_t queries[queries_count][dimensions]; usearch_key_t resulting_keys[queries_count][top_k]; usearch_distance_t resulting_distances[queries_count][top_k]; usearch_exact_search( - &dataset[0][0], dataset_count, dimensions * sizeof(simsimd_f16_t), - &queries[0][0], queries_count, dimensions * sizeof(simsimd_f16_t), + &dataset[0][0], dataset_count, dimensions * sizeof(nk_f16_t), + &queries[0][0], queries_count, dimensions * sizeof(nk_f16_t), usearch_scalar_f16_k, top_k, threads, &resulting_keys[0][0], sizeof(usearch_key_t) * top_k, &resulting_distances[0][0], sizeof(usearch_distance_t) * top_k, diff --git a/c/lib.cpp b/c/lib.cpp index fa1b8a023..05f54bbba 100644 --- a/c/lib.cpp +++ b/c/lib.cpp @@ -56,11 +56,16 @@ usearch_metric_kind_t metric_kind_to_c(metric_kind_t kind) { } scalar_kind_t scalar_kind_to_cpp(usearch_scalar_kind_t kind) { switch (kind) { - case usearch_scalar_f32_k: return scalar_kind_t::f32_k; case usearch_scalar_f64_k: return scalar_kind_t::f64_k; - case usearch_scalar_f16_k: return scalar_kind_t::f16_k; + case usearch_scalar_f32_k: return scalar_kind_t::f32_k; case usearch_scalar_bf16_k: return scalar_kind_t::bf16_k; + case usearch_scalar_f16_k: return scalar_kind_t::f16_k; + case usearch_scalar_e5m2_k: return scalar_kind_t::e5m2_k; + case usearch_scalar_e4m3_k: return scalar_kind_t::e4m3_k; + case usearch_scalar_e3m2_k: return scalar_kind_t::e3m2_k; + case usearch_scalar_e2m3_k: return scalar_kind_t::e2m3_k; case usearch_scalar_i8_k: return scalar_kind_t::i8_k; + case usearch_scalar_u8_k: return scalar_kind_t::u8_k; case usearch_scalar_b1_k: return scalar_kind_t::b1x8_k; default: return scalar_kind_t::unknown_k; } @@ -68,11 +73,16 @@ scalar_kind_t scalar_kind_to_cpp(usearch_scalar_kind_t kind) { usearch_scalar_kind_t scalar_kind_to_c(scalar_kind_t kind) { switch (kind) { - case scalar_kind_t::f32_k: return usearch_scalar_f32_k; case scalar_kind_t::f64_k: return usearch_scalar_f64_k; - case scalar_kind_t::f16_k: return usearch_scalar_f16_k; + case scalar_kind_t::f32_k: return usearch_scalar_f32_k; case scalar_kind_t::bf16_k: return usearch_scalar_bf16_k; + case scalar_kind_t::f16_k: return usearch_scalar_f16_k; + case scalar_kind_t::e5m2_k: return usearch_scalar_e5m2_k; + case scalar_kind_t::e4m3_k: return usearch_scalar_e4m3_k; + case scalar_kind_t::e3m2_k: return usearch_scalar_e3m2_k; + case scalar_kind_t::e2m3_k: return usearch_scalar_e2m3_k; case scalar_kind_t::i8_k: return usearch_scalar_i8_k; + case scalar_kind_t::u8_k: return usearch_scalar_u8_k; case scalar_kind_t::b1x8_k: return usearch_scalar_b1_k; default: return usearch_scalar_unknown_k; } @@ -80,11 +90,12 @@ usearch_scalar_kind_t scalar_kind_to_c(scalar_kind_t kind) { add_result_t add_(index_dense_t* index, usearch_key_t key, void const* vector, scalar_kind_t kind) { switch (kind) { - case scalar_kind_t::f32_k: return index->add(key, (f32_t const*)vector); case scalar_kind_t::f64_k: return index->add(key, (f64_t const*)vector); - case scalar_kind_t::f16_k: return index->add(key, (f16_t const*)vector); + case scalar_kind_t::f32_k: return index->add(key, (f32_t const*)vector); case scalar_kind_t::bf16_k: return index->add(key, (bf16_t const*)vector); + case scalar_kind_t::f16_k: return index->add(key, (f16_t const*)vector); case scalar_kind_t::i8_k: return index->add(key, (i8_t const*)vector); + case scalar_kind_t::u8_k: return index->add(key, (u8_t const*)vector); case scalar_kind_t::b1x8_k: return index->add(key, (b1x8_t const*)vector); default: return add_result_t{}.failed("Unknown scalar kind!"); } @@ -92,11 +103,12 @@ add_result_t add_(index_dense_t* index, usearch_key_t key, void const* vector, s std::size_t get_(index_dense_t* index, usearch_key_t key, size_t count, void* vector, scalar_kind_t kind) { switch (kind) { - case scalar_kind_t::f32_k: return index->get(key, (f32_t*)vector, count); case scalar_kind_t::f64_k: return index->get(key, (f64_t*)vector, count); - case scalar_kind_t::f16_k: return index->get(key, (f16_t*)vector, count); + case scalar_kind_t::f32_k: return index->get(key, (f32_t*)vector, count); case scalar_kind_t::bf16_k: return index->get(key, (bf16_t*)vector, count); + case scalar_kind_t::f16_k: return index->get(key, (f16_t*)vector, count); case scalar_kind_t::i8_k: return index->get(key, (i8_t*)vector, count); + case scalar_kind_t::u8_k: return index->get(key, (u8_t*)vector, count); case scalar_kind_t::b1x8_k: return index->get(key, (b1x8_t*)vector, count); default: return search_result_t(*index).failed("Unknown scalar kind!"); } @@ -106,16 +118,18 @@ template search_result_t search_(index_dense_t* index, void const* vector, scalar_kind_t kind, size_t n, predicate_at&& predicate = predicate_at{}) { switch (kind) { - case scalar_kind_t::f32_k: - return index->filtered_search((f32_t const*)vector, n, std::forward(predicate)); case scalar_kind_t::f64_k: return index->filtered_search((f64_t const*)vector, n, std::forward(predicate)); - case scalar_kind_t::f16_k: - return index->filtered_search((f16_t const*)vector, n, std::forward(predicate)); + case scalar_kind_t::f32_k: + return index->filtered_search((f32_t const*)vector, n, std::forward(predicate)); case scalar_kind_t::bf16_k: return index->filtered_search((bf16_t const*)vector, n, std::forward(predicate)); + case scalar_kind_t::f16_k: + return index->filtered_search((f16_t const*)vector, n, std::forward(predicate)); case scalar_kind_t::i8_k: return index->filtered_search((i8_t const*)vector, n, std::forward(predicate)); + case scalar_kind_t::u8_k: + return index->filtered_search((u8_t const*)vector, n, std::forward(predicate)); case scalar_kind_t::b1x8_k: return index->filtered_search((b1x8_t const*)vector, n, std::forward(predicate)); default: return search_result_t(*index).failed("Unknown scalar kind!"); @@ -133,6 +147,10 @@ USEARCH_EXPORT char const* usearch_version(void) { return version; } +USEARCH_EXPORT char const* usearch_hardware_acceleration_compiled(void) { return hardware_acceleration_compiled(); } + +USEARCH_EXPORT char const* usearch_hardware_acceleration_available(void) { return hardware_acceleration_available(); } + USEARCH_EXPORT usearch_index_t usearch_init(usearch_init_options_t* options, usearch_error_t* error) { USEARCH_ASSERT(error && "Missing arguments"); @@ -413,7 +431,7 @@ USEARCH_EXPORT size_t usearch_search( USEARCH_EXPORT size_t usearch_filtered_search( // usearch_index_t index, // void const* query, usearch_scalar_kind_t query_kind, size_t results_limit, // - usearch_filtered_search_callback_t filter, void* filter_state, // + usearch_filtered_search_callback_t filter, void* filter_state, // usearch_key_t* found_keys, usearch_distance_t* found_distances, usearch_error_t* error) { USEARCH_ASSERT(index && query && filter && error && "Missing arguments"); diff --git a/c/test.c b/c/test.c index dd4a07f0a..361fee1c8 100644 --- a/c/test.c +++ b/c/test.c @@ -1,10 +1,91 @@ +/** + * @file test.c + * @author Ash Vardanian + * @brief Unit tests for the pure-C ABI of USearch (`usearch.h`). + * @date June 25, 2023 + * + * Exercises the lifecycle of `usearch_index_t` through the public C surface: + * index creation with every supported metric and scalar kind, `add` / `get` / + * `find` / `remove`, on-disk `save` / `load` / `view`, and error propagation + * via `usearch_error_t`. The harness is intentionally dependency-free so it + * can run in the same matrix as the C++ tests and on cross-compilation + * targets where only the C runtime is available. + * + * On startup we install a signal handler (see `install_crash_handlers`) that + * prints a native back-trace before re-raising, so CI logs pinpoint the + * faulting frame instead of stopping at a bare exit code. + */ #include -#include // `remove` +#include // `signal`, `raise`, `SIGSEGV` +#include // `remove` #include +#include // `memset` #include +/* Back-trace support for the C test harness. The `signal` API is standard C; + * the back-trace itself is taken via an OS-specific facility since C has no + * standard stack-introspection API. On Windows, `dbghelp.h` references types + * (e.g. `PSTR`) that are only defined after `windows.h`, so the two headers + * are separated by a blank line to keep clang-format from re-sorting them + * into a single alphabetized block. */ +#if defined(_WIN32) +#include + +#include +#pragma comment(lib, "Dbghelp.lib") +#elif defined(__unix__) || defined(__APPLE__) +#include +#include +#endif + #include "usearch.h" +static void usearch_write_backtrace(int signal_number) { + fprintf(stderr, "\n[usearch] Fatal signal %d. Back-trace:\n", signal_number); +#if defined(_WIN32) + enum { backtrace_depth_limit = 64 }; + void* backtrace_frames[backtrace_depth_limit]; + USHORT backtrace_depth = CaptureStackBackTrace(0, backtrace_depth_limit, backtrace_frames, NULL); + HANDLE current_process = GetCurrentProcess(); + SymInitialize(current_process, NULL, TRUE); + + unsigned char symbol_info_buffer[sizeof(SYMBOL_INFO) + 256 * sizeof(char)]; + SYMBOL_INFO* symbol_info = (SYMBOL_INFO*)symbol_info_buffer; + symbol_info->MaxNameLen = 255; + symbol_info->SizeOfStruct = sizeof(SYMBOL_INFO); + + for (USHORT frame_index = 0; frame_index < backtrace_depth; ++frame_index) { + if (SymFromAddr(current_process, (DWORD64)backtrace_frames[frame_index], 0, symbol_info)) + fprintf(stderr, " #%2u %s + 0x%llx\n", (unsigned)frame_index, symbol_info->Name, + (unsigned long long)((DWORD64)backtrace_frames[frame_index] - symbol_info->Address)); + else + fprintf(stderr, " #%2u %p\n", (unsigned)frame_index, backtrace_frames[frame_index]); + } +#elif defined(__unix__) || defined(__APPLE__) + enum { backtrace_depth_limit = 64 }; + void* backtrace_frames[backtrace_depth_limit]; + int backtrace_depth = backtrace(backtrace_frames, backtrace_depth_limit); + backtrace_symbols_fd(backtrace_frames, backtrace_depth, STDERR_FILENO); +#else + (void)signal_number; + fprintf(stderr, " \n"); +#endif + fflush(stderr); +} + +static void usearch_crash_handler(int signal_number) { + usearch_write_backtrace(signal_number); + /* Restore the default disposition and re-raise so the shell / CI sees the true exit status. */ + signal(signal_number, SIG_DFL); + raise(signal_number); +} + +static void install_crash_handlers(void) { + int const fatal_signals[] = {SIGSEGV, SIGABRT, SIGILL, SIGFPE}; + for (unsigned signal_index = 0; signal_index < sizeof(fatal_signals) / sizeof(fatal_signals[0]); ++signal_index) + signal(fatal_signals[signal_index], &usearch_crash_handler); +} + void expect(bool must_be_true, char const* message) { if (must_be_true) return; @@ -368,6 +449,7 @@ void test_view(size_t const collection_size, size_t const dimensions) { } int main(int argc, char const* argv[]) { + install_crash_handlers(); printf("Running tests...\n"); printf("USearch version: %s\n", usearch_version()); diff --git a/c/usearch.h b/c/usearch.h index 7921c192c..49ec2a51e 100644 --- a/c/usearch.h +++ b/c/usearch.h @@ -53,12 +53,17 @@ USEARCH_EXPORT typedef enum usearch_metric_kind_t { USEARCH_EXPORT typedef enum usearch_scalar_kind_t { usearch_scalar_unknown_k = 0, - usearch_scalar_f32_k = 1, usearch_scalar_f64_k = 2, + usearch_scalar_f32_k = 1, + usearch_scalar_bf16_k = 6, usearch_scalar_f16_k = 3, + usearch_scalar_e5m2_k = 7, + usearch_scalar_e4m3_k = 8, + usearch_scalar_e3m2_k = 11, + usearch_scalar_e2m3_k = 10, usearch_scalar_i8_k = 4, + usearch_scalar_u8_k = 9, usearch_scalar_b1_k = 5, - usearch_scalar_bf16_k = 6, } usearch_scalar_kind_t; USEARCH_EXPORT typedef struct usearch_init_options_t { @@ -109,8 +114,6 @@ USEARCH_EXPORT typedef struct usearch_init_options_t { bool multi; } usearch_init_options_t; -extern int goFilteredSearchCallback(usearch_key_t, void*); - USEARCH_EXPORT typedef int (*usearch_filtered_search_callback_t)(usearch_key_t, void*); /** @@ -119,6 +122,18 @@ USEARCH_EXPORT typedef int (*usearch_filtered_search_callback_t)(usearch_key_t, */ USEARCH_EXPORT char const* usearch_version(void); +/** + * @brief Retrieves a list of hardware capabilities in this precompiled binary. + * @return A comma-separated string with names of CPU features. + */ +USEARCH_EXPORT char const* usearch_hardware_acceleration_compiled(void); + +/** + * @brief Retrieves a list of hardware capabilities supported by the current machine. + * @return A comma-separated string with names of CPU features. + */ +USEARCH_EXPORT char const* usearch_hardware_acceleration_available(void); + /** * @brief Initializes a new instance of the index. * @param options Pointer to the `usearch_init_options_t` structure containing initialization options. @@ -395,7 +410,7 @@ USEARCH_EXPORT size_t usearch_search( // USEARCH_EXPORT size_t usearch_filtered_search( // usearch_index_t index, // void const* query_vector, usearch_scalar_kind_t query_kind, size_t count, // - usearch_filtered_search_callback_t filter, void* filter_state, // + usearch_filtered_search_callback_t filter, void* filter_state, // usearch_key_t* keys, usearch_distance_t* distances, usearch_error_t* error); /** @@ -484,6 +499,8 @@ USEARCH_EXPORT void usearch_exact_search( // */ USEARCH_EXPORT void usearch_clear(usearch_index_t index, usearch_error_t* error); +extern int goFilteredSearchCallback(usearch_key_t, void*); + #ifdef __cplusplus } #endif diff --git a/conanfile.py b/conanfile.py index b13fe4874..48f205991 100644 --- a/conanfile.py +++ b/conanfile.py @@ -10,7 +10,7 @@ class USearchConan(ConanFile): version = "2.24.0" license = "Apache-2.0" description = "Smaller & Faster Single-File Vector Search Engine from Unum" - homepage = "https://github.com/unum-cloud/usearch" + homepage = "https://github.com/unum-cloud/USearch" topics = ("search", "vector", "simd") settings = "os", "arch", "compiler", "build_type" url = "https://github.com/conan-io/conan-center-index" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8301de6cb..7cea50f1e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -7,6 +7,10 @@ if (USEARCH_BUILD_TEST_CPP) target_include_directories(test_cpp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../stringzilla/include) + # Export the dynamic symbol table so `backtrace_symbols` / `std::stacktrace` + # can resolve function names when the in-test crash handler fires. + set_target_properties(test_cpp PROPERTIES ENABLE_EXPORTS ON) + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(test_cpp PRIVATE -Wno-vla -Wno-unused-function -Wno-cast-function-type) endif () diff --git a/cpp/README.md b/cpp/README.md index 74c692507..5b8f18d3e 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -6,7 +6,7 @@ To use in a C++ project, copy the `include/usearch/*` headers into your project. Alternatively, fetch it with CMake: ```cmake -FetchContent_Declare(usearch GIT_REPOSITORY https://github.com/unum-cloud/usearch.git) +FetchContent_Declare(usearch GIT_REPOSITORY https://github.com/unum-cloud/USearch.git) FetchContent_MakeAvailable(usearch) ``` @@ -43,9 +43,9 @@ int main(int argc, char **argv) { Here we: -- define a metric of kind [`metric_kind_t::l2sq_k`](https://unum-cloud.github.io/usearch/cpp/reference.html#_CPPv413metric_kind_t), -- to be applied to [`scalar_kind_t::f32_k`](https://unum-cloud.github.io/usearch/cpp/reference.html#_CPPv413scalar_kind_t) floating-point vectors, -- instantiate an [`index_dense_t`](https://unum-cloud.github.io/usearch/cpp/reference.html#_CPPv4I00EN4unum7usearch14index_dense_gtE) index. +- define a metric of kind [`metric_kind_t::l2sq_k`](https://unum-cloud.github.io/USearch/cpp/reference.html#_CPPv413metric_kind_t), +- to be applied to [`scalar_kind_t::f32_k`](https://unum-cloud.github.io/USearch/cpp/reference.html#_CPPv413scalar_kind_t) floating-point vectors, +- instantiate an [`index_dense_t`](https://unum-cloud.github.io/USearch/cpp/reference.html#_CPPv4I00EN4unum7usearch14index_dense_gtE) index. The `add` is thread-safe for concurrent index construction. It also has an overload for different vector types, casting them under the hood. @@ -174,7 +174,7 @@ In its high-level interface, USearch supports a variety of metrics, including th - `metric_haversine_gt` for "Haversine" or "Great Circle" distance between coordinates used in GIS applications. - `metric_divergence_gt` for the "Jensen Shannon" similarity between probability distributions. -In reality, for most common types, one of the [SimSIMD](https://github.com/ashvardanian/SimSIMD) backends will be triggered, providing hardware-acceleration for most common CPUs. +In reality, for most common types, one of the [NumKong](https://github.com/ashvardanian/numkong) backends will be triggered, providing hardware-acceleration for most common CPUs. If you need a different metric, you can implement it yourself and wrap it into a `metric_punned_t`, which is our alternative to the `std::function`. Unlike the `std::function`, it is a trivial type, which is important for performance. diff --git a/cpp/bench.cpp b/cpp/bench.cpp index 740616aad..c04edab2c 100644 --- a/cpp/bench.cpp +++ b/cpp/bench.cpp @@ -26,16 +26,18 @@ #include // `stat` -#include #include #include -#include // `std::cerr` -#include // `std::iota` -#include // `std::invalid_argument` -#include // `std::to_string` -#include // `std::thread::hardware_concurrency()` -#include // `std::monostate` -#include + +#include // ? +#include // `std::cerr` +#include // `std::iota` +#include // `std::invalid_argument` +#include // `std::to_string` +#include // `std::thread::hardware_concurrency()` +#include // `std::unordered_map` +#include // `std::monostate` +#include // `std::vector` #include // Command Line Interface #if USEARCH_USE_OPENMP @@ -112,9 +114,10 @@ struct alignas(32) persisted_matrix_gt { if (fstat(file_descriptor, &stat_vectors) == -1) throw std::invalid_argument("Couldn't obtain file stats"); raw_length = stat_vectors.st_size; - raw_handle = (std::uint8_t*)mmap(NULL, raw_length, PROT_READ, MAP_PRIVATE, file_descriptor, 0); - if (raw_handle == nullptr) + auto* result = mmap(NULL, raw_length, PROT_READ, MAP_PRIVATE, file_descriptor, 0); + if (result == MAP_FAILED) throw std::invalid_argument("Couldn't memory-map the file"); + raw_handle = (std::uint8_t*)result; std::memcpy(&rows, raw_handle, sizeof(rows)); std::memcpy(&cols, raw_handle + sizeof(rows), sizeof(cols)); scalars = (scalar_t*)(raw_handle + sizeof(rows) + sizeof(cols)); @@ -201,14 +204,20 @@ struct persisted_dataset_gt { for (std::size_t i = 0; i < ids_matrix.rows; ++i) vector_ids_[i] = static_cast(*ids_matrix.row(i)); } + + // When custom IDs are loaded and self-search is active, populate + // neighborhoods_iota_ with the actual IDs so recall comparison works. + if (has_vector_ids() && !queries_.scalars && !neighborhoods_.scalars) { + for (std::size_t i = 0; i < neighborhoods_iota_.size(); ++i) + neighborhoods_iota_[i] = static_cast(vector_ids_[i]); + } } bool search_itself() const noexcept { return vectors_count() && !queries_.rows; } bool has_vector_ids() const noexcept { return !vector_ids_.empty(); } default_key_t vector_id(std::size_t i) const noexcept { - return has_vector_ids() ? vector_ids_[i + vectors_to_skip_] - : static_cast(i + vectors_to_skip_); + return has_vector_ids() ? vector_ids_[i + vectors_to_skip_] : static_cast(i + vectors_to_skip_); } std::size_t dimensions() const noexcept { return vectors_.cols; } @@ -224,9 +233,7 @@ struct persisted_dataset_gt { std::size_t vectors_count() const noexcept { return vectors_to_take_ ? vectors_to_take_ : (vectors_.rows - vectors_to_skip_); } - matrix_slice_gt vectors_view() const noexcept { - return {vector(vectors_to_skip_), vectors_count(), dimensions()}; - } + matrix_slice_gt vectors_view() const noexcept { return {vector(0), vectors_count(), dimensions()}; } }; template // @@ -288,7 +295,7 @@ struct running_stats_printer_t { std::size_t count = progress.load(); timestamp_t time = std::chrono::high_resolution_clock::now(); std::size_t duration = std::chrono::duration_cast(time - start_time).count(); - float vectors_per_second = count * 1e9 / duration; + float vectors_per_second = static_cast(count * 1e9 / duration); std::printf("\r\33[2K100 %% completed, %.0f vectors/s\n", vectors_per_second); } @@ -314,7 +321,7 @@ struct running_stats_printer_t { timestamp_t time_new = std::chrono::high_resolution_clock::now(); std::size_t duration = std::chrono::duration_cast(time_new - last_printed_time).count(); - float vectors_per_second = count_new * 1e9 / duration; + float vectors_per_second = static_cast(count_new * 1e9 / duration); std::printf("\r%3.3f%% [%.*s%*s] %.0f vectors/s, finished %zu/%zu", percentage * 100.f, left_pad, bars_k, right_pad, "", vectors_per_second, progress, total); @@ -377,9 +384,8 @@ void search_many( // } template // -static void single_shot(dataset_at& dataset, index_at& index, bool construct = true) { +static void single_shot(dataset_at& dataset, index_at& index, bool construct = true, bool bench_join = false) { using distance_t = typename index_at::distance_t; - constexpr default_key_t missing_key = std::numeric_limits::max(); std::printf("\n"); std::printf("------------\n"); @@ -391,6 +397,9 @@ static void single_shot(dataset_at& dataset, index_at& index, bool construct = t index_many(index, dataset.vectors_count(), ids.data(), dataset.vector(0), dataset.dimensions()); } + std::size_t mem = index.memory_usage(); + std::printf("Memory usage: %.2f GB\n", mem / (1024.0 * 1024.0 * 1024.0)); + // Perform search, evaluate speed std::vector found_neighbors(dataset.queries_count() * dataset.neighborhood_size()); std::vector found_distances(dataset.queries_count() * dataset.neighborhood_size()); @@ -409,40 +418,38 @@ static void single_shot(dataset_at& dataset, index_at& index, bool construct = t std::printf("Recall@1 %.2f %%\n", recall_at_1 * 100.f / dataset.queries_count()); std::printf("Recall %.2f %%\n", recall_full * 100.f / dataset.queries_count()); - // Perform joins - std::vector man_to_woman(dataset.vectors_count()); - std::vector woman_to_man(dataset.vectors_count()); - std::size_t join_attempts = 0; - { + if (!bench_join) { + // Perform joins using maps to support non-contiguous IDs + std::unordered_map man_to_woman; + std::unordered_map woman_to_man; + std::size_t join_attempts = 0; + index_at& men = index; index_at women = index.copy(); - std::fill(man_to_woman.begin(), man_to_woman.end(), missing_key); - std::fill(woman_to_man.begin(), woman_to_man.end(), missing_key); - { - executor_default_t executor(index.limits().threads()); - running_stats_printer_t printer{1, "Join"}; - join_result_t result = join( // - men, women, index_join_config_t{executor.size()}, // - man_to_woman.data(), woman_to_man.data(), // - executor, [&](std::size_t progress, std::size_t total) { - if (progress % 1000 == 0) - printer.print(progress, total); - return true; - }); - // Refresh once again to show 100% completion - printer.print(); - join_attempts = result.visited_members; - } - } - // Evaluate join quality - std::size_t recall_join = 0, unmatched_count = 0; - for (std::size_t i = 0; i != index.size(); ++i) { - recall_join += man_to_woman[i] == static_cast(i); - unmatched_count += man_to_woman[i] == missing_key; + + executor_default_t executor(index.limits().threads()); + running_stats_printer_t printer{1, "Join"}; + join_result_t result = join( // + men, women, index_join_config_t{executor.size()}, // + man_to_woman, woman_to_man, // + executor, [&](std::size_t progress, std::size_t total) { + if (progress % 1000 == 0) + printer.print(progress, total); + return true; + }); + // Refresh once again to show 100% completion + printer.print(); + join_attempts = result.visited_members; + + // Evaluate join quality + std::size_t recall_join = 0; + for (auto const& [man, woman] : man_to_woman) + recall_join += (man == woman); + std::size_t unmatched_count = dataset.vectors_count() - man_to_woman.size(); + std::printf("Recall Joins %.2f %%\n", recall_join * 100.f / index.size()); + std::printf("Unmatched %.2f %% (%zu items)\n", unmatched_count * 100.f / index.size(), unmatched_count); + std::printf("Proposals %.2f / man (%zu total)\n", join_attempts * 1.f / index.size(), join_attempts); } - std::printf("Recall Joins %.2f %%\n", recall_join * 100.f / index.size()); - std::printf("Unmatched %.2f %% (%zu items)\n", unmatched_count * 100.f / index.size(), unmatched_count); - std::printf("Proposals %.2f / man (%zu total)\n", join_attempts * 1.f / index.size(), join_attempts); std::printf("------------\n"); std::printf("\n"); @@ -473,7 +480,7 @@ void handler(int sig) { name = ""; } DWORD bytes_written; - WriteFile(STDERR_FILENO, name, std::strlen(name), &bytes_written, NULL); + WriteFile(STDERR_FILENO, name, static_cast(std::strlen(name)), &bytes_written, NULL); WriteFile(STDERR_FILENO, "\n", 1, &bytes_written, NULL); } free(symbol); @@ -508,49 +515,24 @@ struct args_t { bool help = false; bool big = false; + bool join = false; + bool view = false; - bool quantize_bf16 = false; - bool quantize_f16 = false; - bool quantize_i8 = false; - bool quantize_b1 = false; - - bool metric_ip = false; - bool metric_l2 = false; - bool metric_cos = false; - bool metric_haversine = false; - bool metric_divergence = false; - bool metric_hamming = false; - bool metric_tanimoto = false; - bool metric_sorensen = false; + std::string dtype_str = "f32"; + std::string metric_str = "ip"; metric_kind_t metric() const noexcept { - if (metric_l2) - return metric_kind_t::l2sq_k; - if (metric_cos) - return metric_kind_t::cos_k; - if (metric_haversine) - return metric_kind_t::haversine_k; - if (metric_divergence) - return metric_kind_t::divergence_k; - if (metric_hamming) - return metric_kind_t::hamming_k; - if (metric_tanimoto) - return metric_kind_t::tanimoto_k; - if (metric_sorensen) - return metric_kind_t::sorensen_k; - return metric_kind_t::ip_k; + auto parsed = metric_from_name(metric_str.c_str(), metric_str.size()); + if (!parsed) + return metric_kind_t::ip_k; + return parsed.result; } scalar_kind_t quantization() const noexcept { - if (quantize_bf16) - return scalar_kind_t::bf16_k; - if (quantize_f16) - return scalar_kind_t::f16_k; - if (quantize_i8) - return scalar_kind_t::i8_k; - if (quantize_b1) - return scalar_kind_t::b1x8_k; - return scalar_kind_t::f32_k; + auto parsed = scalar_kind_from_name(dtype_str.c_str(), dtype_str.size()); + if (!parsed) + return scalar_kind_t::f32_k; + return parsed.result; } }; @@ -569,14 +551,16 @@ void run_punned(dataset_at& dataset, args_t const& args, index_dense_config_t co std::printf("-- Hardware acceleration: %s\n", index.metric().isa_name()); std::printf("Will benchmark in-memory\n"); - single_shot(dataset, index, true); + single_shot(dataset, index, true, args.join); index.save(args.path_output.c_str()); + if (!args.view) + return; std::printf("Will benchmark an on-disk view\n"); index_at index_view = index.fork(); index_view.view(args.path_output.c_str()); - single_shot(dataset, index_view, false); + single_shot(dataset, index_view, false, args.join); } template // @@ -586,14 +570,16 @@ void run_typed(dataset_at& dataset, args_t const& args, index_config_t config, i index.reserve(limits); std::printf("Will benchmark in-memory\n"); - single_shot(dataset, index, true); + single_shot(dataset, index, true, args.join); index.save(args.path_output.c_str()); + if (!args.view) + return; std::printf("Will benchmark an on-disk view\n"); index_at index_view = index.fork(); index_view.view(args.path_output.c_str()); - single_shot(dataset, index_view, false); + single_shot(dataset, index_view, false, args.join); } template void bench_with_args(args_t const& args) { @@ -636,9 +622,9 @@ int main(int argc, char** argv) { auto args = args_t{}; auto cli = ( // (option("--vectors") & value("path", args.path_vectors)) - .doc(".[fhbd]bin, .i8bin, .f32bin file path to construct the index"), + .doc(".[fhbd]bin, .i8bin, .u8bin, .f32bin file path to construct the index"), (option("--queries") & value("path", args.path_queries)) - .doc(".[fhbd]bin, .i8bin, .f32bin file path to query the index"), + .doc(".[fhbd]bin, .i8bin, .u8bin, .f32bin file path to query the index"), (option("--neighbors") & value("path", args.path_neighbors)).doc(".ibin, .i32bin file path with ground truth"), (option("--ids") & value("path", args.path_ids)).doc(".i32bin file path with vector IDs (optional)"), (option("-o", "--output") & value("path", args.path_output)).doc(".usearch output file path"), @@ -649,20 +635,13 @@ int main(int argc, char** argv) { (option("--expansion-search") & value("integer", args.expansion_search)).doc("Affects search depth"), (option("--rows-skip") & value("integer", args.vectors_to_skip)).doc("Number of vectors to skip"), (option("--rows-take") & value("integer", args.vectors_to_take)).doc("Number of vectors to take"), - ( // - option("-bf16", "--bf16quant").set(args.quantize_bf16).doc("Enable `bf16_t` quantization") | - option("-f16", "--f16quant").set(args.quantize_f16).doc("Enable `f16_t` quantization") | - option("-i8", "--i8quant").set(args.quantize_i8).doc("Enable `i8_t` quantization") | - option("-b1", "--b1quant").set(args.quantize_b1).doc("Enable `b1x8_t` quantization")), - ( // - option("--ip").set(args.metric_ip).doc("Choose Inner Product metric") | - option("--l2sq").set(args.metric_l2).doc("Choose L2 Euclidean metric") | - option("--cos").set(args.metric_cos).doc("Choose Angular metric") | - option("--hamming").set(args.metric_hamming).doc("Choose Hamming metric") | - option("--tanimoto").set(args.metric_tanimoto).doc("Choose Tanimoto metric") | - option("--sorensen").set(args.metric_sorensen).doc("Choose Sorensen metric") | - option("--haversine").set(args.metric_haversine).doc("Choose Haversine metric")), - option("-h", "--help").set(args.help).doc("Print this help information on this tool and exit")); + (option("--dtype") & value("type", args.dtype_str)) + .doc("Quantization type: f64, f32, bf16, f16, e5m2, e4m3, e3m2, e2m3, i8, u8, b1"), + (option("--metric") & value("name", args.metric_str)) + .doc("Distance metric: ip, l2sq, cos, hamming, tanimoto, sorensen, haversine"), + option("-h", "--help").set(args.help).doc("Print this help information on this tool and exit"), + option("--join").set(args.join).doc("Also benchmark joins"), + option("--view").set(args.view).doc("Also benchmark on-disk view")); if (!parse(argc, argv, cli)) { std::cerr << make_man_page(cli, argv[0]); @@ -678,10 +657,12 @@ int main(int argc, char** argv) { // to better estimate statistics between tasks batches, without having to recreate // the threads. omp_set_dynamic(true); - omp_set_num_threads(args.threads); + omp_set_num_threads(static_cast(args.threads)); std::printf("- OpenMP threads: %d\n", omp_get_max_threads()); #endif + std::printf("- Hardware acceleration compiled: %s\n", hardware_acceleration_compiled()); + std::printf("- Hardware acceleration available: %s\n", hardware_acceleration_available()); std::printf("- Dataset: \n"); std::printf("-- Base vectors path: %s\n", args.path_vectors.c_str()); std::printf("-- Query vectors path: %s\n", args.path_queries.c_str()); @@ -693,18 +674,18 @@ int main(int argc, char** argv) { return stack.find(needle, stack.size() - needle.size()) != std::string_view::npos; }; - if (ends_with(args.path_vectors, ".fbin")) - bench_with_args(args); - else if (ends_with(args.path_vectors, ".dbin")) + if (ends_with(args.path_vectors, ".dbin")) bench_with_args(args); + else if (ends_with(args.path_vectors, ".fbin") || ends_with(args.path_vectors, ".f32bin")) + bench_with_args(args); else if (ends_with(args.path_vectors, ".hbin")) bench_with_args(args); - else if (ends_with(args.path_vectors, ".bbin")) - bench_with_args(args); else if (ends_with(args.path_vectors, ".i8bin")) bench_with_args(args); - else if (ends_with(args.path_vectors, ".f32bin")) - bench_with_args(args); + else if (ends_with(args.path_vectors, ".u8bin")) + bench_with_args(args); + else if (ends_with(args.path_vectors, ".bbin")) + bench_with_args(args); else throw std::runtime_error("Unknown input file path"); diff --git a/cpp/test.cpp b/cpp/test.cpp index a125f05ad..91cff0b05 100644 --- a/cpp/test.cpp +++ b/cpp/test.cpp @@ -12,14 +12,45 @@ * - 128-bit `uuid_t` keys and `enum slot64_t : std::uint64_t` make most sense for * for database users, implementing portable, concurrent systems. */ +#include // `assert` +#include // `std::abs` +#include // `std::signal`, `SIGSEGV`, ... +#include // `std::fprintf` +#include // `std::_Exit` + #include // `std::shuffle` -#include // `assert` -#include // `std::abs` #include // `std::default_random_engine` #include // `std::terminate` #include // `std::unordered_map` #include // `std::vector` +// Back-trace support. Prefer the C++23 `` library when the +// toolchain + stdlib expose it (`__cpp_lib_stacktrace`); otherwise fall back +// to the OS-native facility so that unit-test crashes in CI log something +// useful beyond a bare exit code. +#if defined(__has_include) +#if __has_include() +#include +#endif +#endif +#if defined(__cpp_lib_stacktrace) && __cpp_lib_stacktrace >= 202011L +#define USEARCH_HAS_STD_STACKTRACE 1 +#else +#define USEARCH_HAS_STD_STACKTRACE 0 +#if defined(_WIN32) +// `windows.h` must precede `dbghelp.h` โ€” the latter uses `PSTR` and friends +// that are only defined after `windows.h`. The blank line keeps clang-format +// from re-sorting the two headers into a single alphabetized block. +#include + +#include +#pragma comment(lib, "Dbghelp.lib") +#else +#include +#include +#endif +#endif + #define SZ_USE_X86_AVX512 0 // Sanitizers hate AVX512 #include // Levenshtein distance implementation @@ -54,7 +85,7 @@ void __expect_eq(value_at a, value_at b, char const* file, int line, char const* enum slot32_t : std::uint32_t {}; template <> struct unum::usearch::hash_gt : public unum::usearch::hash_gt {}; template <> struct unum::usearch::default_free_value_gt { - static slot32_t value() noexcept { return static_cast(std::numeric_limits::max()); } + static slot32_t value() noexcept { return static_cast((std::numeric_limits::max)()); } }; /* @@ -166,8 +197,8 @@ void test_uint40() { } // Test min and max functions - expect_eq(uint40_t::min(), uint40_t(0u)); - expect_eq(uint40_t::max(), uint40_t(max_uint40_k)); + expect_eq((uint40_t::min)(), uint40_t(0u)); + expect_eq((uint40_t::max)(), uint40_t(max_uint40_k)); // Test copy and move semantics for (std::uint64_t input_u64 : test_numbers) { @@ -664,7 +695,7 @@ void test_cosine(std::size_t collection_size, std::size_t dimensions) { vector_of_vectors_t vector_of_vectors(collection_size); for (auto& vector : vector_of_vectors) { vector.resize(dimensions); - std::generate(vector.begin(), vector.end(), [=] { return float(std::rand()) / float(INT_MAX); }); + std::generate(vector.begin(), vector.end(), [=] { return float(std::rand()) / float(RAND_MAX); }); } struct metric_t { @@ -1127,20 +1158,20 @@ void test_filtered_search() { { auto predicate = [](index_dense_t::key_t key) { return key != 0; }; auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate); - expect_eq(10, results.size()); // ! Should not contain 0 + expect_eq(10u, results.size()); // ! Should not contain 0 for (std::size_t i = 0; i != results.size(); ++i) expect(0 != results[i].member.key); } { auto predicate = [](index_dense_t::key_t) { return false; }; auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate); - expect_eq(0, results.size()); // ! Should not contain 0 + expect_eq(0u, results.size()); // ! Should not contain 0 } { auto predicate = [](index_dense_t::key_t key) { return key == 10; }; auto results = index.filtered_search(vector_of_vectors[0].data(), 10, predicate); - expect_eq(1, results.size()); // ! Should not contain 0 - expect_eq(10, results[0].member.key); + expect_eq(1u, results.size()); // ! Should not contain 0 + expect_eq(index_dense_t::key_t(10), results[0].member.key); } } @@ -1179,13 +1210,73 @@ void test_isolate() { } } +static void usearch_write_backtrace(int signal_number) { + std::fprintf(stderr, "\n[usearch] Fatal signal %d. Back-trace:\n", signal_number); +#if USEARCH_HAS_STD_STACKTRACE + // C++23 `std::stacktrace` covers every platform the library can reach. + auto const current_trace = std::stacktrace::current(); + std::size_t frame_index = 0; + for (auto const& frame : current_trace) { + std::fprintf(stderr, " #%2zu %s\n", frame_index, std::to_string(frame).c_str()); + ++frame_index; + } +#elif defined(_WIN32) + // Fallback for MSVC stdlibs without ``: DbgHelp API. + constexpr USHORT backtrace_depth_limit = 64; + void* backtrace_frames[backtrace_depth_limit]; + USHORT backtrace_depth = CaptureStackBackTrace(0, backtrace_depth_limit, backtrace_frames, nullptr); + HANDLE current_process = GetCurrentProcess(); + SymInitialize(current_process, nullptr, TRUE); + + unsigned char symbol_info_buffer[sizeof(SYMBOL_INFO) + 256 * sizeof(char)]; + SYMBOL_INFO* symbol_info = reinterpret_cast(symbol_info_buffer); + symbol_info->MaxNameLen = 255; + symbol_info->SizeOfStruct = sizeof(SYMBOL_INFO); + + for (USHORT frame_index = 0; frame_index < backtrace_depth; ++frame_index) { + if (SymFromAddr(current_process, reinterpret_cast(backtrace_frames[frame_index]), 0, symbol_info)) + std::fprintf(stderr, " #%2u %s + 0x%llx\n", static_cast(frame_index), symbol_info->Name, + static_cast(reinterpret_cast(backtrace_frames[frame_index]) - + symbol_info->Address)); + else + std::fprintf(stderr, " #%2u %p\n", static_cast(frame_index), backtrace_frames[frame_index]); + } +#else + // Fallback for POSIX stdlibs without ``: ``. + constexpr int backtrace_depth_limit = 64; + void* backtrace_frames[backtrace_depth_limit]; + int const backtrace_depth = backtrace(backtrace_frames, backtrace_depth_limit); + backtrace_symbols_fd(backtrace_frames, backtrace_depth, STDERR_FILENO); +#endif + std::fflush(stderr); +} + +static void usearch_crash_handler(int signal_number) { + usearch_write_backtrace(signal_number); + // Restore the default disposition and re-raise so the shell / CI sees the true exit status. + std::signal(signal_number, SIG_DFL); + std::raise(signal_number); +} + +static void install_crash_handlers() { + int const fatal_signals[] = {SIGSEGV, SIGABRT, SIGILL, SIGFPE}; + for (int signal_number : fatal_signals) + std::signal(signal_number, &usearch_crash_handler); +} + int main(int, char**) { - test_uint40(); - test_cosine(10, 10); + install_crash_handlers(); + + std::printf("Hardware acceleration compiled: %s\n", hardware_acceleration_compiled()); + std::printf("Hardware acceleration available: %s\n", hardware_acceleration_available()); // Non-default floating-point types may result in many compilation & rounding issues. - test_cosine(10, 10); + test_uint40(); + test_cosine(10, 10); test_cosine(10, 10); + test_cosine(10, 10); + test_cosine(10, 10); + test_cosine(10, 10); // Test plugins, like K-Means clustering. { @@ -1195,7 +1286,7 @@ int main(int, char**) { std::vector vectors(vectors_count * dimensions), centroids(centroids_count * dimensions); matrix_slice_gt vectors_slice(vectors.data(), dimensions, vectors_count); matrix_slice_gt centroids_slice(centroids.data(), dimensions, centroids_count); - std::generate(vectors.begin(), vectors.end(), [] { return float(std::rand()) / float(INT_MAX); }); + std::generate(vectors.begin(), vectors.end(), [] { return float(std::rand()) / float(RAND_MAX); }); std::vector assignments(vectors_count); std::vector distances(vectors_count); auto clustering_result = clustering(vectors_slice, centroids_slice, {assignments.data(), assignments.size()}, @@ -1209,9 +1300,11 @@ int main(int, char**) { for (std::size_t dataset_count : {10, 100}) for (std::size_t queries_count : {1, 10}) for (std::size_t wanted_count : {1, 5}) { - test_exact_search(dataset_count, queries_count, wanted_count); - test_exact_search(dataset_count, queries_count, wanted_count); + test_exact_search(dataset_count, queries_count, wanted_count); test_exact_search(dataset_count, queries_count, wanted_count); + test_exact_search(dataset_count, queries_count, wanted_count); + test_exact_search(dataset_count, queries_count, wanted_count); + test_exact_search(dataset_count, queries_count, wanted_count); } // Make sure the initializers and the algorithms can work with inadequately small values. @@ -1235,12 +1328,16 @@ int main(int, char**) { std::printf("Testing common cases\n"); for (std::size_t collection_size : {10, 500}) for (std::size_t dimensions : {97, 256}) { - std::printf("- Indexing %zu vectors with cos: \n", collection_size); - test_cosine(collection_size, dimensions); - std::printf("- Indexing %zu vectors with cos: \n", collection_size); - test_cosine(collection_size, dimensions); + std::printf("- Indexing %zu vectors with cos: \n", collection_size); + test_cosine(collection_size, dimensions); + std::printf("- Indexing %zu vectors with cos: \n", collection_size); + test_cosine(collection_size, dimensions); std::printf("- Indexing %zu vectors with cos: \n", collection_size); test_cosine(collection_size, dimensions); + std::printf("- Indexing %zu vectors with cos: \n", collection_size); + test_cosine(collection_size, dimensions); + std::printf("- Indexing %zu vectors with cos: \n", collection_size); + test_cosine(collection_size, dimensions); } // Test with binary vectors diff --git a/csharp/nuget/nuget-package.props b/csharp/nuget/nuget-package.props index 296eaa047..5776741ed 100644 --- a/csharp/nuget/nuget-package.props +++ b/csharp/nuget/nuget-package.props @@ -9,8 +9,8 @@ Cloud.Unum.USearch LICENSE - https://github.com/unum-cloud/usearch - https://github.com/unum-cloud/usearch + https://github.com/unum-cloud/USearch + https://github.com/unum-cloud/USearch true unum.png diff --git a/csharp/src/Cloud.Unum.USearch/NativeMethods.cs b/csharp/src/Cloud.Unum.USearch/NativeMethods.cs index 1ae42b206..83bca522b 100644 --- a/csharp/src/Cloud.Unum.USearch/NativeMethods.cs +++ b/csharp/src/Cloud.Unum.USearch/NativeMethods.cs @@ -15,6 +15,15 @@ internal static class NativeMethods { private const string LibraryName = "libusearch_c"; + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void_ptr_t usearch_version(); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void_ptr_t usearch_hardware_acceleration_compiled(); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void_ptr_t usearch_hardware_acceleration_available(); + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] public static extern usearch_index_t usearch_init(ref IndexOptions options, out usearch_error_t error); @@ -89,4 +98,7 @@ out usearch_error_t error [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] public static extern size_t usearch_rename(usearch_index_t index, usearch_key_t key_from, usearch_key_t key_to, out usearch_error_t error); + + [DllImport(LibraryName, CallingConvention = CallingConvention.Cdecl)] + public static extern void_ptr_t usearch_hardware_acceleration(usearch_index_t index, out usearch_error_t error); } diff --git a/csharp/src/Cloud.Unum.USearch/USearchIndex.cs b/csharp/src/Cloud.Unum.USearch/USearchIndex.cs index 22d3ab671..007a596af 100644 --- a/csharp/src/Cloud.Unum.USearch/USearchIndex.cs +++ b/csharp/src/Cloud.Unum.USearch/USearchIndex.cs @@ -719,8 +719,51 @@ protected virtual void Dispose(bool disposing) } + ///

+ /// Returns the SIMD capability used by this index. + /// + public string HardwareAcceleration() + { + var ptr = NativeMethods.usearch_hardware_acceleration(_index, out var error); + HandleError(error); + return Marshal.PtrToStringAnsi(ptr) ?? "serial"; + } + /// /// Destructor for the USearchIndex class. /// ~USearchIndex() => this.Dispose(false); } + +/// +/// System-wide hardware capability queries โ€” not tied to any index instance. +/// +public static class USearchCapabilities +{ + /// + /// Returns the USearch library version string. + /// + public static string Version() + { + var ptr = NativeMethods.usearch_version(); + return Marshal.PtrToStringAnsi(ptr) ?? "unknown"; + } + + /// + /// Returns a comma-separated list of ISAs compiled into this binary. + /// + public static string HardwareAccelerationCompiled() + { + var ptr = NativeMethods.usearch_hardware_acceleration_compiled(); + return Marshal.PtrToStringAnsi(ptr) ?? "serial"; + } + + /// + /// Returns a comma-separated list of ISAs available at runtime. + /// + public static string HardwareAccelerationAvailable() + { + var ptr = NativeMethods.usearch_hardware_acceleration_available(); + return Marshal.PtrToStringAnsi(ptr) ?? "serial"; + } +} diff --git a/csharp/src/Cloud.Unum.USearch/USearchTypes.cs b/csharp/src/Cloud.Unum.USearch/USearchTypes.cs index f66c4a70c..76b0d5f0d 100644 --- a/csharp/src/Cloud.Unum.USearch/USearchTypes.cs +++ b/csharp/src/Cloud.Unum.USearch/USearchTypes.cs @@ -74,26 +74,56 @@ public enum ScalarKind : uint /// Unknown = 0, + /// + /// 64-bit floating point. + /// + Float64 = 2, + /// /// 32-bit floating point. /// Float32 = 1, /// - /// 64-bit floating point. + /// Brain floating point (16-bit). /// - Float64 = 2, + BFloat16 = 6, /// /// 16-bit floating point. /// Float16 = 3, + /// + /// FP8 E5M2 (1 sign + 5 exponent + 2 mantissa). + /// + E5M2 = 7, + + /// + /// FP8 E4M3 (1 sign + 4 exponent + 3 mantissa). + /// + E4M3 = 8, + + /// + /// FP6 E3M2 (1 sign + 3 exponent + 2 mantissa). + /// + E3M2 = 11, + + /// + /// FP6 E2M3 (1 sign + 2 exponent + 3 mantissa). + /// + E2M3 = 10, + /// /// 8-bit integer. /// Int8 = 4, + /// + /// 8-bit unsigned integer. + /// + U8 = 9, + /// /// 1-bit binary. /// diff --git a/docs/_static/custom.js b/docs/_static/custom.js index 9c97a22d7..926b8243c 100644 --- a/docs/_static/custom.js +++ b/docs/_static/custom.js @@ -1,5 +1,5 @@ $(document).ready(function () { - const github_logo = ` + const github_logo = ` ` diff --git a/fp16 b/fp16 deleted file mode 160000 index 0a92994d7..000000000 --- a/fp16 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91 diff --git a/golang/README.md b/golang/README.md index 764641c98..6f7859b93 100644 --- a/golang/README.md +++ b/golang/README.md @@ -8,7 +8,7 @@ Download and install the Debian package from the latest release. Substitute ``, ``, and `` with your settings. ```sh -wget https://github.com/unum-cloud/usearch/releases/download//usearch_linux__.deb +wget https://github.com/unum-cloud/USearch/releases/download//usearch_linux__.deb dpkg -i usearch_linux__.deb ``` @@ -27,7 +27,7 @@ Download and unpack the zip archive from the latest release. Move the USearch library and the include file to their respective folders. ```sh -wget https://github.com/unum-cloud/usearch/releases/download//usearch_macos__.zip +wget https://github.com/unum-cloud/USearch/releases/download//usearch_macos__.zip unzip usearch_macos__.zip sudo mv libusearch_c.dylib /usr/local/lib && sudo mv usearch.h /usr/local/include ``` diff --git a/golang/go.mod b/golang/go.mod index a8cb11744..e936b5d5c 100644 --- a/golang/go.mod +++ b/golang/go.mod @@ -1,3 +1,3 @@ module github.com/unum-cloud/usearch/golang -go 1.19 +go 1.22 diff --git a/golang/lib.go b/golang/lib.go index db4879dda..41d4e561a 100644 --- a/golang/lib.go +++ b/golang/lib.go @@ -119,18 +119,29 @@ func (m Metric) CValue() C.usearch_metric_kind_t { // Different quantization types offer different trade-offs between memory usage and precision. type Quantization uint8 -// Different quantization kinds supported by the USearch library. +// Different quantization kinds supported by the USearch library, +// ordered by descending dynamic range. const ( + // F64 uses 64-bit double precision floating point + F64 Quantization = iota // F32 uses 32-bit floating point (standard precision) - F32 Quantization = iota + F32 // BF16 uses brain floating-point format (16-bit) BF16 // F16 uses half-precision floating point (16-bit) F16 - // F64 uses 64-bit double precision floating point - F64 + // E5M2 uses 8-bit floating point (1 sign + 5 exponent + 2 mantissa) + E5M2 + // E4M3 uses 8-bit floating point (1 sign + 4 exponent + 3 mantissa) + E4M3 + // E3M2 uses 6-bit floating point (1 sign + 3 exponent + 2 mantissa) + E3M2 + // E2M3 uses 6-bit floating point (1 sign + 2 exponent + 3 mantissa) + E2M3 // I8 uses 8-bit signed integers (quantized) I8 + // U8 uses 8-bit unsigned integers + U8 // B1 uses binary representation (1-bit per dimension) B1 ) @@ -138,16 +149,26 @@ const ( // String returns the string representation of the Quantization. func (q Quantization) String() string { switch q { + case F64: + return "F64" + case F32: + return "F32" case BF16: return "BF16" case F16: return "F16" - case F32: - return "F32" - case F64: - return "F64" + case E5M2: + return "E5M2" + case E4M3: + return "E4M3" + case E3M2: + return "E3M2" + case E2M3: + return "E2M3" case I8: return "I8" + case U8: + return "U8" case B1: return "B1" default: @@ -157,18 +178,28 @@ func (q Quantization) String() string { func (q Quantization) CValue() C.usearch_scalar_kind_t { switch q { - case F16: - return C.usearch_scalar_f16_k - case F32: - return C.usearch_scalar_f32_k case F64: return C.usearch_scalar_f64_k + case F32: + return C.usearch_scalar_f32_k + case BF16: + return C.usearch_scalar_bf16_k + case F16: + return C.usearch_scalar_f16_k + case E5M2: + return C.usearch_scalar_e5m2_k + case E4M3: + return C.usearch_scalar_e4m3_k + case E3M2: + return C.usearch_scalar_e3m2_k + case E2M3: + return C.usearch_scalar_e2m3_k case I8: return C.usearch_scalar_i8_k + case U8: + return C.usearch_scalar_u8_k case B1: return C.usearch_scalar_b1_k - case BF16: - return C.usearch_scalar_bf16_k default: return C.usearch_scalar_unknown_k } @@ -224,6 +255,21 @@ type Index struct { config IndexConfig } +// Version returns the USearch library version string. +func Version() string { + return C.GoString(C.usearch_version()) +} + +// HardwareAccelerationCompiled returns a comma-separated list of ISAs compiled into the binary. +func HardwareAccelerationCompiled() string { + return C.GoString(C.usearch_hardware_acceleration_compiled()) +} + +// HardwareAccelerationAvailable returns a comma-separated list of ISAs available at runtime. +func HardwareAccelerationAvailable() string { + return C.GoString(C.usearch_hardware_acceleration_available()) +} + // NewIndex creates a new approximate nearest neighbor index with the specified configuration. // // The index must be destroyed with Destroy() when no longer needed. @@ -271,11 +317,6 @@ func NewIndex(conf IndexConfig) (index *Index, err error) { return index, nil } -// Version returns the USearch library version string. -func Version() string { - return C.GoString(C.usearch_version()) -} - // GetHandle returns the C index handel. func (index *Index) GetHandle() C.usearch_index_t { return index.handle @@ -1015,6 +1056,123 @@ func (index *Index) FilteredSearchI8(query []int8, limit uint, handler *Filtered return keys, distances, nil } +// AddU8 adds a uint8 vector to the index. +// The vector must have exactly Dimensions() elements. +// +// This is a convenience method for indexes using U8 quantization. +func (index *Index) AddU8(key Key, vec []uint8) error { + if index.handle == nil { + panic("index is uninitialized") + } + if len(vec) == 0 { + return errors.New("vector cannot be empty") + } + if uint(len(vec)) != index.config.Dimensions { + return fmt.Errorf("vector dimension mismatch: got %d, expected %d", len(vec), index.config.Dimensions) + } + var errorMessage *C.char + C.usearch_add(index.handle, (C.usearch_key_t)(key), unsafe.Pointer(&vec[0]), C.usearch_scalar_u8_k, (*C.usearch_error_t)(&errorMessage)) + runtime.KeepAlive(vec) + if errorMessage != nil { + return errors.New(C.GoString(errorMessage)) + } + return nil +} + +// SearchU8 searches for nearest neighbors using a uint8 query vector. +// The query must have exactly Dimensions() elements. +// +// This is a convenience method for indexes using U8 quantization. +func (index *Index) SearchU8(query []uint8, limit uint) (keys []Key, distances []float32, err error) { + if index.handle == nil { + panic("index is uninitialized") + } + if len(query) == 0 { + return nil, nil, errors.New("query vector cannot be empty") + } + if uint(len(query)) != index.config.Dimensions { + return nil, nil, fmt.Errorf("query dimension mismatch: got %d, expected %d", len(query), index.config.Dimensions) + } + if limit == 0 { + return []Key{}, []float32{}, nil + } + keys = make([]Key, limit) + distances = make([]float32, limit) + var errorMessage *C.char + resultCount := uint(C.usearch_search(index.handle, unsafe.Pointer(&query[0]), C.usearch_scalar_u8_k, (C.size_t)(limit), (*C.usearch_key_t)(&keys[0]), (*C.usearch_distance_t)(&distances[0]), (*C.usearch_error_t)(&errorMessage))) + runtime.KeepAlive(query) + runtime.KeepAlive(keys) + runtime.KeepAlive(distances) + if errorMessage != nil { + return nil, nil, errors.New(C.GoString(errorMessage)) + } + keys = keys[:resultCount] + distances = distances[:resultCount] + return keys, distances, nil +} + +// FilteredSearchU8 searches for nearest neighbors using a uint8 query vector with filtering. +func (index *Index) FilteredSearchU8(query []uint8, limit uint, handler *FilteredSearchHandler) (keys []Key, distances []float32, err error) { + if index.handle == nil { + panic("index is uninitialized") + } + + if len(query) == 0 { + return nil, nil, errors.New("query vector cannot be empty") + } + if uint(len(query)) != index.config.Dimensions { + return nil, nil, fmt.Errorf("query dimension mismatch: got %d, expected %d", len(query), index.config.Dimensions) + } + if handler == nil { + return nil, nil, errors.New("filtered search handler cannot be nil") + } + if limit == 0 { + return []Key{}, []float32{}, nil + } + + keys = make([]Key, limit) + distances = make([]float32, limit) + var errorMessage *C.char + resultCount := uint(C.usearch_filtered_search(index.handle, unsafe.Pointer(&query[0]), C.usearch_scalar_u8_k, (C.size_t)(limit), + (C.usearch_filtered_search_callback_t)(C.goFilteredSearchCallback), unsafe.Pointer(handler), + (*C.usearch_key_t)(&keys[0]), (*C.usearch_distance_t)(&distances[0]), (*C.usearch_error_t)(&errorMessage))) + runtime.KeepAlive(query) + runtime.KeepAlive(keys) + runtime.KeepAlive(distances) + runtime.KeepAlive(handler) + if errorMessage != nil { + return nil, nil, errors.New(C.GoString(errorMessage)) + } + + keys = keys[:resultCount] + distances = distances[:resultCount] + return keys, distances, nil +} + +// GetU8 retrieves a uint8 vector by key from the index. +// Returns nil if the key is not found. +func (index *Index) GetU8(key Key, maxCount uint) (vectors []uint8, err error) { + if index.handle == nil { + panic("index is uninitialized") + } + + if maxCount == 0 { + return nil, nil + } + + vectors = make([]uint8, index.config.Dimensions*maxCount) + var errorMessage *C.char + found := uint(C.usearch_get(index.handle, (C.usearch_key_t)(key), (C.size_t)(maxCount), unsafe.Pointer(&vectors[0]), C.usearch_scalar_u8_k, (*C.usearch_error_t)(&errorMessage))) + runtime.KeepAlive(vectors) + if errorMessage != nil { + return nil, errors.New(C.GoString(errorMessage)) + } + if found == 0 { + return nil, nil + } + return vectors, nil +} + // DistanceI8 computes the distance between two int8 vectors. // // Example: diff --git a/golang/lib_test.go b/golang/lib_test.go index 4f9b72439..12eff69c5 100644 --- a/golang/lib_test.go +++ b/golang/lib_test.go @@ -45,6 +45,14 @@ func generateTestVectorI8(dimensions uint) []int8 { return vector } +func generateTestVectorU8(dimensions uint) []uint8 { + vector := make([]uint8, dimensions) + for i := uint(0); i < dimensions; i++ { + vector[i] = uint8((i % 255) + 1) + } + return vector +} + func populateIndex(t *testing.T, index *Index, vectorCount int) [][]float32 { vectors := make([][]float32, vectorCount) err := index.Reserve(uint(vectorCount)) @@ -657,6 +665,65 @@ func TestQuantizationTypes(t *testing.T) { t.Fatalf("FilteredSearchI8 returned incorrect results") } }) + + t.Run("U8 operations", func(t *testing.T) { + index := createTestIndex(t, 32, U8) + defer func() { + if err := index.Destroy(); err != nil { + t.Errorf("Failed to destroy index: %v", err) + } + }() + + if err := index.Reserve(1); err != nil { + t.Fatalf("Failed to reserve capacity: %v", err) + } + vector := generateTestVectorU8(32) + err := index.AddU8(1, vector) + if err != nil { + t.Fatalf("U8 Add failed: %v", err) + } + + keys, _, err := index.SearchU8(vector, 1) + if err != nil { + t.Fatalf("U8 Search failed: %v", err) + } + + if len(keys) == 0 || keys[0] != 1 { + t.Fatalf("U8 search results incorrect") + } + + // Test FilteredSearchU8 + handler := &FilteredSearchHandler{ + Callback: func(key Key, handler *FilteredSearchHandler) int { + if key%2 == 0 { + return 1 + } + return 0 + }, + Data: int64(1), + } + + keys, _, err = index.FilteredSearchU8(vector, 1, handler) + if err != nil { + t.Fatalf("FilteredSearchU8 failed: %v", err) + } + + if len(keys) > 0 { + t.Fatalf("FilteredSearchU8 returned incorrect results") + } + + // Test GetU8 + retrieved, err := index.GetU8(1, 1) + if err != nil { + t.Fatalf("U8 Get failed: %v", err) + } + if retrieved == nil { + t.Fatalf("U8 Get returned nil") + } + if len(retrieved) != 32 { + t.Fatalf("U8 Get returned wrong dimensions: got %d, expected 32", len(retrieved)) + } + }) } func TestUnsafeOperations(t *testing.T) { diff --git a/include/usearch/index.hpp b/include/usearch/index.hpp index 21a212d99..24ddbaac3 100644 --- a/include/usearch/index.hpp +++ b/include/usearch/index.hpp @@ -597,6 +597,125 @@ template > class bitset_gt { using bitset_t = bitset_gt<>; +/** + * @brief Cache-line-padded striped spin-lock array for concurrent graph mutations. + * Maps node slots to lock stripes via Fibonacci hashing, with each stripe + * occupying its own cache line to eliminate false sharing. + * The number of stripes is proportional to `threads * connectivity`, not + * graph size, keeping the lock array comfortably within L2/L3 cache. + */ +template , std::size_t cache_line_ak = 128> // +class striped_locks_gt { + using allocator_t = allocator_at; + using byte_t = typename allocator_t::value_type; + static_assert(sizeof(byte_t) == 1, "Allocator must allocate separate addressable bytes"); + + static constexpr std::uint64_t fibonacci_k = 0x9E3779B97F4A7C15ull; + + using atomic_flag_t = std::atomic; + struct alignas(cache_line_ak) padded_lock_t { + atomic_flag_t flag{0}; + char padding_[cache_line_ak - sizeof(atomic_flag_t)]; + }; + static_assert(sizeof(padded_lock_t) == cache_line_ak, "Lock stripe must be exactly one cache line"); + + // `padded_lock_t` is `alignas(cache_line_ak)` (128 B by default) which + // exceeds what a plain allocator guarantees (typically 16 B on x86-64). + // Rather than demanding an over-aligned allocator, we over-allocate and + // keep a pointer to the aligned sub-region โ€” `raw_` is what we hand back + // to the allocator, `stripes_` is the aligned view used for reads/writes. + byte_t* raw_{}; + std::size_t raw_bytes_{}; + padded_lock_t* stripes_{}; + std::size_t count_{}; + unsigned shift_{}; + + inline std::size_t stripe_for_(std::size_t slot) const noexcept { + return static_cast((static_cast(slot) * fibonacci_k) >> shift_); + } + + public: + striped_locks_gt() noexcept {} + ~striped_locks_gt() noexcept { reset(); } + + explicit operator bool() const noexcept { return stripes_; } + + void reset() noexcept { + if (stripes_) + for (std::size_t i = 0; i < count_; i++) + stripes_[i].~padded_lock_t(); + if (raw_) + allocator_t{}.deallocate(raw_, raw_bytes_); + raw_ = nullptr; + raw_bytes_ = 0; + stripes_ = nullptr; + count_ = 0; + shift_ = 64; + } + + striped_locks_gt(std::size_t threads, std::size_t connectivity) noexcept { + std::size_t desired = threads * connectivity * 4; + if (desired < 256) + desired = 256; + count_ = ceil2(desired); + shift_ = 64; + for (std::size_t n = count_; n > 1; n >>= 1) + shift_--; + // Request one extra stripe's worth of slack so we can always land on a + // `cache_line_ak`-aligned address inside the allocation, regardless of + // what the underlying allocator returns. + constexpr std::size_t alignment_k = alignof(padded_lock_t); + raw_bytes_ = count_ * sizeof(padded_lock_t) + alignment_k; + raw_ = allocator_t{}.allocate(raw_bytes_); + if (!raw_) { + raw_bytes_ = 0; + count_ = 0; + shift_ = 64; + return; + } + auto raw_address = reinterpret_cast(raw_); + auto aligned_address = (raw_address + alignment_k - 1) & ~(static_cast(alignment_k) - 1); + stripes_ = reinterpret_cast(aligned_address); + for (std::size_t i = 0; i < count_; i++) + new (&stripes_[i]) padded_lock_t(); + } + + striped_locks_gt(striped_locks_gt&& other) noexcept { + raw_ = exchange(other.raw_, (byte_t*)nullptr); + raw_bytes_ = exchange(other.raw_bytes_, std::size_t{0}); + stripes_ = exchange(other.stripes_, nullptr); + count_ = exchange(other.count_, std::size_t{0}); + shift_ = exchange(other.shift_, unsigned{64}); + } + + striped_locks_gt& operator=(striped_locks_gt&& other) noexcept { + std::swap(raw_, other.raw_); + std::swap(raw_bytes_, other.raw_bytes_); + std::swap(stripes_, other.stripes_); + std::swap(count_, other.count_); + std::swap(shift_, other.shift_); + return *this; + } + + striped_locks_gt(striped_locks_gt const&) = delete; + striped_locks_gt& operator=(striped_locks_gt const&) = delete; + + inline bool atomic_set(std::size_t i) noexcept { + return stripes_[stripe_for_(i)].flag.exchange(1, std::memory_order_acquire); + } + + inline void atomic_reset(std::size_t i) noexcept { + stripes_[stripe_for_(i)].flag.store(0, std::memory_order_release); + } + + inline void lock(std::size_t i) noexcept { + while (atomic_set(i)) + std::this_thread::yield(); + } + + inline void unlock(std::size_t i) noexcept { atomic_reset(i); } +}; + /** * @brief Similar to `std::priority_queue`, but allows raw access to underlying * memory, in case you want to shuffle it or sort. Good for collections @@ -728,10 +847,12 @@ class max_heap_gt { */ usearch_profiled_m bool reserve(std::size_t new_capacity) noexcept { usearch_profile_name_m(max_heap_reserve); - if (new_capacity < capacity_) + if (new_capacity <= capacity_) return true; new_capacity = ceil2(new_capacity); + if (new_capacity == 0) + return false; new_capacity = (std::max)(new_capacity, (std::max)(capacity_ * 2u, 16u)); auto allocator = allocator_t{}; auto new_elements = allocator.allocate(new_capacity); @@ -894,10 +1015,12 @@ class sorted_buffer_gt { inline void clear() noexcept { size_ = 0; } bool reserve(std::size_t new_capacity) noexcept { - if (new_capacity < capacity_) + if (new_capacity <= capacity_) return true; new_capacity = ceil2(new_capacity); + if (new_capacity == 0) + return false; new_capacity = (std::max)(new_capacity, (std::max)(capacity_ * 2u, 16u)); auto allocator = allocator_t{}; auto new_elements = allocator.allocate(new_capacity); @@ -1013,8 +1136,10 @@ class usearch_pack_m uint40_t { return result; } - inline static uint40_t max() noexcept { return uint40_t{}.broadcast(0xFF); } - inline static uint40_t min() noexcept { return uint40_t{}.broadcast(0); } + /* Parenthesized declarator keeps MSVC's preprocessor from expanding + * `max` / `min` against ``'s `max(a,b)` / `min(a,b)` macros. */ + inline static uint40_t(max)() noexcept { return uint40_t{}.broadcast(0xFF); } + inline static uint40_t(min)() noexcept { return uint40_t{}.broadcast(0); } inline bool operator==(uint40_t const& other) const noexcept { return std::memcmp(octets, other.octets, 5) == 0; } inline bool operator!=(uint40_t const& other) const noexcept { return !(*this == other); } @@ -1046,7 +1171,7 @@ template struct default_free_value_gt { template ::value>::type* = nullptr> static sfinae_element_at value() noexcept { - return std::numeric_limits::max(); + return (std::numeric_limits::max)(); } template ::value>::type* = nullptr> @@ -1056,7 +1181,7 @@ template struct default_free_value_gt { }; template <> struct default_free_value_gt { - static uint40_t value() noexcept { return uint40_t::max(); } + static uint40_t value() noexcept { return (uint40_t::max)(); } }; template element_at default_free_value() { return default_free_value_gt::value(); } @@ -2031,8 +2156,8 @@ class index_gt { friend inline vector_key_t get_key(member_iterator_gt const& it) noexcept { return it.key(); } // clang-format off - member_iterator_gt operator++(int) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) + 1)); } - member_iterator_gt operator--(int) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) - 1)); } + member_iterator_gt operator++(int) noexcept { member_iterator_gt old(index_, slot_); ++(*this); return old; } + member_iterator_gt operator--(int) noexcept { member_iterator_gt old(index_, slot_); --(*this); return old; } member_iterator_gt operator+(difference_type d) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) + d)); } member_iterator_gt operator-(difference_type d) noexcept { return member_iterator_gt(index_, static_cast(static_cast(slot_) - d)); } member_iterator_gt& operator++() noexcept { slot_ = static_cast(static_cast(slot_) + 1); return *this; } @@ -2086,7 +2211,7 @@ class index_gt { */ static constexpr std::size_t node_head_bytes_() { return sizeof(vector_key_t) + sizeof(level_t); } - using nodes_mutexes_t = bitset_gt; + using nodes_mutexes_t = striped_locks_gt; using visits_hash_set_t = growing_hash_set_gt, dynamic_allocator_t>; @@ -2191,7 +2316,7 @@ class index_gt { misaligned_store(tape_ + shift(i - removed_count), slot); } } - misaligned_store(tape_, old_count - removed_count); + misaligned_store(tape_, static_cast(old_count - removed_count)); return removed_count; } }; @@ -2509,7 +2634,8 @@ class index_gt { return true; } - nodes_mutexes_t new_mutexes(limits.members); + std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity); + nodes_mutexes_t new_mutexes(limits.threads(), connectivity_max); buffer_gt new_nodes(limits.members); buffer_gt new_contexts(limits.threads()); if (!new_nodes || !new_contexts || !new_mutexes) @@ -2518,10 +2644,6 @@ class index_gt { // Move the nodes info, and deallocate previous buffers. if (nodes_) std::memcpy(new_nodes.data(), nodes_.data(), sizeof(node_t) * size()); - - // Pre-reserve the capacity for `top_for_refine`, which always contains at most one more - // element than the connectivity factors. - std::size_t connectivity_max = (std::max)(config_.connectivity_base, config_.connectivity); for (std::size_t i = 0; i != new_contexts.size(); ++i) if (!new_contexts[i].top_for_refine.reserve(connectivity_max + 1)) return false; @@ -2569,7 +2691,7 @@ class index_gt { member_cref_t member; distance_t distance; - inline match_t() noexcept : member({nullptr, 0}), distance(std::numeric_limits::max()) {} + inline match_t() noexcept : member({nullptr, 0}), distance((std::numeric_limits::max)()) {} inline match_t(member_cref_t member, distance_t distance) noexcept : member(member), distance(distance) {} @@ -2721,7 +2843,7 @@ class index_gt { keys[i] = vector_key_t{}; distances[i] = std::numeric_limits::has_signaling_NaN ? std::numeric_limits::signaling_NaN() - : std::numeric_limits::max(); + : (std::numeric_limits::max)(); } return initialized_count; } @@ -2788,6 +2910,10 @@ class index_gt { callback_at&& callback = callback_at{}, // prefetch_at&& prefetch = prefetch_at{}) usearch_noexcept_m { + // Zero expansion is meaningless, fall back to default + if (!config.expansion) + config.expansion = default_expansion_add(); + add_result_t result; if (is_immutable()) return result.failed("Can't add to an immutable index"); @@ -3103,6 +3229,9 @@ class index_gt { predicate_at&& predicate = predicate_at{}, // prefetch_at&& prefetch = prefetch_at{}) const noexcept { + if (!config.expansion) + config.expansion = default_expansion_search(); + context_t& context = contexts_[config.thread]; cluster_result_t result; if (!nodes_count_) @@ -3181,7 +3310,7 @@ class index_gt { continue; ++result.nodes; - result.edges += neighbors_(node, level).size(); + result.edges += neighbors_(node, static_cast(level)).size(); result.allocated_bytes += node_head_bytes_() + neighbors_bytes; } @@ -3803,12 +3932,11 @@ class index_gt { struct node_lock_t { nodes_mutexes_t& mutexes; std::size_t slot; - inline ~node_lock_t() noexcept { mutexes.atomic_reset(slot); } + inline ~node_lock_t() noexcept { mutexes.unlock(slot); } }; inline node_lock_t node_lock_(std::size_t slot) const noexcept { - while (nodes_mutexes_.atomic_set(slot)) - ; + nodes_mutexes_.lock(slot); return {nodes_mutexes_, slot}; } @@ -3816,18 +3944,17 @@ class index_gt { nodes_mutexes_t& mutexes; std::size_t slot; inline ~optional_node_lock_t() noexcept { - if (slot != std::numeric_limits::max()) - mutexes.atomic_reset(slot); + if (slot != (std::numeric_limits::max)()) + mutexes.unlock(slot); } }; inline optional_node_lock_t optional_node_lock_(std::size_t slot, bool condition) const noexcept { if (condition) { - while (nodes_mutexes_.atomic_set(slot)) - ; + nodes_mutexes_.lock(slot); return {nodes_mutexes_, slot}; } else { - return {nodes_mutexes_, std::numeric_limits::max()}; + return {nodes_mutexes_, (std::numeric_limits::max)()}; } } @@ -3835,8 +3962,8 @@ class index_gt { nodes_mutexes_t& mutexes; std::size_t slot; inline ~node_conditional_lock_t() noexcept { - if (slot != std::numeric_limits::max()) - mutexes.atomic_reset(slot); + if (slot != (std::numeric_limits::max)()) + mutexes.unlock(slot); } }; @@ -3844,10 +3971,10 @@ class index_gt { bool& failed_to_acquire) const noexcept { if (!condition) { failed_to_acquire = false; - return {nodes_mutexes_, std::numeric_limits::max()}; + return {nodes_mutexes_, (std::numeric_limits::max)()}; } failed_to_acquire = nodes_mutexes_.atomic_set(slot); - return {nodes_mutexes_, failed_to_acquire ? std::numeric_limits::max() : slot}; + return {nodes_mutexes_, failed_to_acquire ? (std::numeric_limits::max)() : slot}; } template @@ -3898,8 +4025,12 @@ class index_gt { usearch_assert_m(close_slot != new_slot, "Self-loops are impossible"); usearch_assert_m(level <= close_node.level(), "Linking to missing level"); - // If `new_slot` is already present in the neighboring connections of `close_slot` - // then no need to modify any connections or run the heuristics. + // Skip to prevent duplicate entries in the neighbor list. + if (std::find_if(close_header.begin(), close_header.end(), + [new_slot](compressed_slot_t slot) { return slot == new_slot; }) != close_header.end()) { + continue; + } + if (close_header.size() < connectivity_max) { close_header.push_back(new_slot); continue; @@ -3914,7 +4045,7 @@ class index_gt { // Export the results: close_header.clear(); candidates_view_t top_view = refine_(metric, connectivity_max, top_for_refine, context, - context.computed_distances_in_reverse_refines); + context.computed_distances_in_reverse_refines, new_slot, value); usearch_assert_m(top_view.size(), "This would lead to isolated nodes"); for (std::size_t idx = 0; idx != top_view.size(); idx++) close_header.push_back(top_view[idx].slot); @@ -3962,7 +4093,9 @@ class index_gt { std::size_t progress) noexcept : index_(index), neighbors_(neighbors), visits_(visits), current_(progress) {} candidates_iterator_t operator++(int) noexcept { - return candidates_iterator_t(index_, neighbors_, visits_, current_ + 1).skip_missing(); + candidates_iterator_t old(index_, neighbors_, visits_, current_); + ++(*this); + return old; } candidates_iterator_t& operator++() noexcept { ++current_; @@ -4053,6 +4186,10 @@ class index_gt { // At the very least we are going to explore the starting node and its neighbors if (!visits.reserve(config_.connectivity_base + 1u)) return false; + if (!top.reserve(top_limit)) + return false; + if (!next.reserve(top_limit)) + return false; // Optional prefetching if (!is_dummy()) @@ -4130,6 +4267,10 @@ class index_gt { // At the very least we are going to explore the starting node and its neighbors if (!visits.reserve(config_.connectivity_base + 1u)) return false; + if (!top.reserve(top_limit)) + return false; + if (!next.reserve(top_limit)) + return false; // Optional prefetching if (!is_dummy()) @@ -4298,16 +4439,50 @@ class index_gt { } } + /// @brief Helper for `refine_()`: computes inter-neighbor distance, substituting + /// @p override_value when either slot matches @p override_slot. + /// The `std::nullptr_t` overload below avoids instantiating the override + /// branch when no override is provided, keeping the code C++11 compatible. + template + distance_t inter_neighbor_distance_( // + candidate_t const& candidate, candidate_t const& submitted, // + compressed_slot_t override_slot, override_value_at override_value, // + metric_at&& metric, context_t& context) const noexcept { + if (candidate.slot == override_slot) + return context.measure(override_value, citerator_at(submitted.slot), metric); + else if (submitted.slot == override_slot) + return context.measure(override_value, citerator_at(candidate.slot), metric); + else + return context.measure(citerator_at(candidate.slot), citerator_at(submitted.slot), metric); + } + + template + distance_t inter_neighbor_distance_( // + candidate_t const& candidate, candidate_t const& submitted, // + compressed_slot_t, std::nullptr_t, // + metric_at&& metric, context_t& context) const noexcept { + return context.measure(citerator_at(candidate.slot), citerator_at(submitted.slot), metric); + } + /** * @brief This algorithm from the original paper implements a heuristic, * that massively reduces the number of connections a point has, * to keep only the neighbors, that are from each other. + * + * @param[in] override_slot Optional slot whose stored vector is stale (e.g. during update, + * where the callback has not yet committed the new vector). + * When set, inter-result distances involving this slot will use + * @p override_value instead of reading from `citerator_at()`. + * @param[in] override_value The up-to-date vector for @p override_slot. Only used when + * @p override_value_at is not `std::nullptr_t`. */ - template + template candidates_view_t refine_( // metric_at&& metric, // std::size_t needed, top_candidates_t& top, context_t& context, // - std::size_t& refines_counter) const noexcept { + std::size_t& refines_counter, // + compressed_slot_t override_slot = ((std::numeric_limits::max))(), + override_value_at override_value = {}) const noexcept { // Avoid expensive computation, if the set is already small candidate_t* top_data = top.data(); @@ -4326,10 +4501,8 @@ class index_gt { std::size_t idx = 0; for (; idx < submitted_count; idx++) { candidate_t submitted = top_data[idx]; - distance_t inter_result_dist = context.measure( // - citerator_at(candidate.slot), // - citerator_at(submitted.slot), // - metric); + distance_t inter_result_dist = inter_neighbor_distance_( // + candidate, submitted, override_slot, override_value, metric, context); if (inter_result_dist < candidate.distance) { good = false; break; @@ -4420,7 +4593,7 @@ static join_result_t join( // return result.failed("Can't join with itself, consider copying"); if (config.max_proposals == 0) - config.max_proposals = std::log(men.size()) + executor.size(); + config.max_proposals = static_cast(std::log(men.size())) + executor.size(); using proposals_count_t = std::uint16_t; config.max_proposals = (std::min)(men.size(), config.max_proposals); diff --git a/include/usearch/index_dense.hpp b/include/usearch/index_dense.hpp index 3094494d7..bf23f8424 100644 --- a/include/usearch/index_dense.hpp +++ b/include/usearch/index_dense.hpp @@ -220,7 +220,7 @@ inline scalar_kind_t convert_pre_2_10_scalar_kind(scalar_kind_t scalar_kind) noe case 4: return scalar_kind_t::f64_k; case 5: return scalar_kind_t::f32_k; case 6: return scalar_kind_t::f16_k; - case 7: return scalar_kind_t::f8_k; + case 7: return scalar_kind_t::e5m2_k; case 8: return scalar_kind_t::u64_k; case 9: return scalar_kind_t::u32_k; case 10: return scalar_kind_t::u8_k; @@ -234,7 +234,7 @@ inline scalar_kind_t convert_pre_2_10_scalar_kind(scalar_kind_t scalar_kind) noe /** * @brief Fixes the metadata for pre-v2.10 versions, until we can upgrade to v3. - * Originates from: https://github.com/unum-cloud/usearch/issues/423 + * Originates from: https://github.com/unum-cloud/USearch/issues/423 */ inline void fix_pre_2_10_metadata(index_dense_head_t& head) { if (head.version_major == 2 && head.version_minor < 10) { @@ -746,8 +746,36 @@ class index_dense_gt { vectors_tape_allocator_.total_allocated(); } - static constexpr std::size_t any_thread() { return std::numeric_limits::max(); } - static constexpr distance_t infinite_distance() { return std::numeric_limits::max(); } + /** + * @brief Aggregated memory statistics for the allocator tapes used by the dense index. + */ + struct memory_stats_t { + /// Memory stats for the graph structure allocator. + std::size_t graph_allocated; + std::size_t graph_wasted; + std::size_t graph_reserved; + /// Memory stats for the vectors data allocator. + std::size_t vectors_allocated; + std::size_t vectors_wasted; + std::size_t vectors_reserved; + }; + + /** + * @brief Returns detailed memory statistics with separate breakdowns for the graph + * and vectors allocator tapes. + * @return A `memory_stats_t` struct with per-tape allocated, wasted, and reserved bytes. + */ + memory_stats_t memory_stats() const { + auto const& graph_alloc = typed_->tape_allocator(); + return { + graph_alloc.total_allocated(), graph_alloc.total_wasted(), + graph_alloc.total_reserved(), vectors_tape_allocator_.total_allocated(), + vectors_tape_allocator_.total_wasted(), vectors_tape_allocator_.total_reserved(), + }; + } + + static constexpr std::size_t any_thread() { return (std::numeric_limits::max)(); } + static constexpr distance_t infinite_distance() { return (std::numeric_limits::max)(); } struct aggregated_distances_t { std::size_t count = 0; @@ -757,47 +785,77 @@ class index_dense_gt { }; // clang-format off - add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.b1x8); } - add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.i8); } - add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f16); } - add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.bf16); } - add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f32); } add_result_t add(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f64); } + add_result_t add(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f32); } + add_result_t add(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.bf16); } + add_result_t add(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.f16); } + add_result_t add(vector_key_t key, e5m2_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e5m2); } + add_result_t add(vector_key_t key, e4m3_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e4m3); } + add_result_t add(vector_key_t key, e3m2_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e3m2); } + add_result_t add(vector_key_t key, e2m3_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.e2m3); } + add_result_t add(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.i8); } + add_result_t add(vector_key_t key, u8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.u8); } + add_result_t add(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread(), bool copy_vector = true) { return add_(key, vector, thread, copy_vector, casts_.from.b1x8); } - search_result_t search(b1x8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.b1x8); } - search_result_t search(i8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.i8); } - search_result_t search(f16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f16); } - search_result_t search(bf16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.bf16); } - search_result_t search(f32_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f32); } search_result_t search(f64_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f64); } + search_result_t search(f32_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f32); } + search_result_t search(bf16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.bf16); } + search_result_t search(f16_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.f16); } + search_result_t search(e5m2_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e5m2); } + search_result_t search(e4m3_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e4m3); } + search_result_t search(e3m2_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e3m2); } + search_result_t search(e2m3_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.e2m3); } + search_result_t search(i8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.i8); } + search_result_t search(u8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.u8); } + search_result_t search(b1x8_t const* vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, dummy_predicate_t {}, thread, exact, casts_.from.b1x8); } - template search_result_t filtered_search(b1x8_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.b1x8); } - template search_result_t filtered_search(i8_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.i8); } - template search_result_t filtered_search(f16_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f16); } - template search_result_t filtered_search(bf16_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.bf16); } - template search_result_t filtered_search(f32_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f32); } template search_result_t filtered_search(f64_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f64); } + template search_result_t filtered_search(f32_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f32); } + template search_result_t filtered_search(bf16_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.bf16); } + template search_result_t filtered_search(f16_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.f16); } + template search_result_t filtered_search(e5m2_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.e5m2); } + template search_result_t filtered_search(e4m3_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.e4m3); } + template search_result_t filtered_search(e3m2_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.e3m2); } + template search_result_t filtered_search(e2m3_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.e2m3); } + template search_result_t filtered_search(i8_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.i8); } + template search_result_t filtered_search(u8_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.u8); } + template search_result_t filtered_search(b1x8_t const* vector, std::size_t wanted, predicate_at&& predicate, std::size_t thread = any_thread(), bool exact = false) const { return search_(vector, wanted, std::forward(predicate), thread, exact, casts_.from.b1x8); } - std::size_t get(vector_key_t key, b1x8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.b1x8); } - std::size_t get(vector_key_t key, i8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.i8); } - std::size_t get(vector_key_t key, f16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.f16); } - std::size_t get(vector_key_t key, bf16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.bf16); } - std::size_t get(vector_key_t key, f32_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.f32); } std::size_t get(vector_key_t key, f64_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.f64); } + std::size_t get(vector_key_t key, f32_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.f32); } + std::size_t get(vector_key_t key, bf16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.bf16); } + std::size_t get(vector_key_t key, f16_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.f16); } + std::size_t get(vector_key_t key, e5m2_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.e5m2); } + std::size_t get(vector_key_t key, e4m3_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.e4m3); } + std::size_t get(vector_key_t key, e3m2_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.e3m2); } + std::size_t get(vector_key_t key, e2m3_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.e2m3); } + std::size_t get(vector_key_t key, i8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.i8); } + std::size_t get(vector_key_t key, u8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.u8); } + std::size_t get(vector_key_t key, b1x8_t* vector, std::size_t vectors_count = 1) const { return get_(key, vector, vectors_count, casts_.to.b1x8); } - cluster_result_t cluster(b1x8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.b1x8); } - cluster_result_t cluster(i8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.i8); } - cluster_result_t cluster(f16_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.f16); } - cluster_result_t cluster(bf16_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.bf16); } - cluster_result_t cluster(f32_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.f32); } cluster_result_t cluster(f64_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.f64); } + cluster_result_t cluster(f32_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.f32); } + cluster_result_t cluster(bf16_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.bf16); } + cluster_result_t cluster(f16_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.f16); } + cluster_result_t cluster(e5m2_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.e5m2); } + cluster_result_t cluster(e4m3_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.e4m3); } + cluster_result_t cluster(e3m2_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.e3m2); } + cluster_result_t cluster(e2m3_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.e2m3); } + cluster_result_t cluster(i8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.i8); } + cluster_result_t cluster(u8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.u8); } + cluster_result_t cluster(b1x8_t const* vector, std::size_t level, std::size_t thread = any_thread()) const { return cluster_(vector, level, thread, casts_.from.b1x8); } - aggregated_distances_t distance_between(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.b1x8); } - aggregated_distances_t distance_between(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.i8); } - aggregated_distances_t distance_between(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.f16); } - aggregated_distances_t distance_between(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.bf16); } - aggregated_distances_t distance_between(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.f32); } aggregated_distances_t distance_between(vector_key_t key, f64_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.f64); } + aggregated_distances_t distance_between(vector_key_t key, f32_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.f32); } + aggregated_distances_t distance_between(vector_key_t key, bf16_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.bf16); } + aggregated_distances_t distance_between(vector_key_t key, f16_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.f16); } + aggregated_distances_t distance_between(vector_key_t key, e5m2_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.e5m2); } + aggregated_distances_t distance_between(vector_key_t key, e4m3_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.e4m3); } + aggregated_distances_t distance_between(vector_key_t key, e3m2_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.e3m2); } + aggregated_distances_t distance_between(vector_key_t key, e2m3_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.e2m3); } + aggregated_distances_t distance_between(vector_key_t key, i8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.i8); } + aggregated_distances_t distance_between(vector_key_t key, u8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.u8); } + aggregated_distances_t distance_between(vector_key_t key, b1x8_t const* vector, std::size_t thread = any_thread()) const { return distance_between_(key, vector, thread, casts_.to.b1x8); } // clang-format on /** @@ -835,8 +893,8 @@ class index_dense_gt { if (a_missing || b_missing) return result; - result.min = std::numeric_limits::max(); - result.max = std::numeric_limits::min(); + result.min = (std::numeric_limits::max)(); + result.max = (std::numeric_limits::min)(); result.mean = 0; result.count = 0; @@ -1923,7 +1981,7 @@ class index_dense_gt { cluster_t& merge_source = clusters[unique_clusters - 1]; std::size_t merge_target_idx = 0; - distance_t merge_distance = std::numeric_limits::max(); + distance_t merge_distance = (std::numeric_limits::max)(); for (std::size_t candidate_idx = 0; candidate_idx + 1 < unique_clusters; ++candidate_idx) { distance_t distance = metric_(merge_source.vector, clusters[candidate_idx].vector); @@ -2026,6 +2084,13 @@ class index_dense_gt { // Perform the insertion or the update bool reuse_node = free_slot != default_free_value(); + + byte_t* allocated_vector = nullptr; + if (copy_vector && !reuse_node) { + allocated_vector = vectors_tape_allocator_.allocate(metric_.bytes_per_vector()); + if (!allocated_vector) + return add_result_t{}.failed("Out of memory!"); + } auto on_success = [&](member_ref_t member) { if (config_.enable_key_lookups) { unique_lock_t slot_lock(slot_lookup_mutex_); @@ -2033,7 +2098,7 @@ class index_dense_gt { } if (copy_vector) { if (!reuse_node) - vectors_lookup_[member.slot] = vectors_tape_allocator_.allocate(metric_.bytes_per_vector()); + vectors_lookup_[member.slot] = allocated_vector; std::memcpy(vectors_lookup_[member.slot], vector_data, metric_.bytes_per_vector()); } else vectors_lookup_[member.slot] = (byte_t*)vector_data; @@ -2131,8 +2196,8 @@ class index_dense_gt { if (key_range.first == key_range.second) return result; - result.min = std::numeric_limits::max(); - result.max = std::numeric_limits::min(); + result.min = (std::numeric_limits::max)(); + result.max = (std::numeric_limits::min)(); result.mean = 0; result.count = 0; diff --git a/include/usearch/index_plugins.hpp b/include/usearch/index_plugins.hpp index a17fd2a92..c874fba5b 100644 --- a/include/usearch/index_plugins.hpp +++ b/include/usearch/index_plugins.hpp @@ -10,52 +10,37 @@ #include // `expected_gt` and macros -#if !defined(USEARCH_USE_OPENMP) -#define USEARCH_USE_OPENMP 0 -#endif - -#if USEARCH_USE_OPENMP -#include // `omp_get_num_threads()` -#endif - #if defined(USEARCH_DEFINED_LINUX) #include // `getauxval()` #endif -#if !defined(USEARCH_USE_FP16LIB) -#if defined(__AVX512F__) -#define USEARCH_USE_FP16LIB 0 -#elif defined(USEARCH_DEFINED_ARM) -#include // `__fp16` -#define USEARCH_USE_FP16LIB 0 -#else -#define USEARCH_USE_FP16LIB 1 -#endif +#if !defined(USEARCH_USE_OPENMP) +#define USEARCH_USE_OPENMP 0 #endif -#if USEARCH_USE_FP16LIB -#include +#if USEARCH_USE_OPENMP +#include // `omp_get_num_threads()` #endif -#if !defined(USEARCH_USE_SIMSIMD) -#define USEARCH_USE_SIMSIMD 0 +#if !defined(USEARCH_USE_NUMKONG) +#define USEARCH_USE_NUMKONG 0 #endif -#if USEARCH_USE_SIMSIMD +#if USEARCH_USE_NUMKONG // Propagate the `f16` settings #if defined(USEARCH_CAN_COMPILE_FP16) || defined(USEARCH_CAN_COMPILE_FLOAT16) #if USEARCH_CAN_COMPILE_FP16 || USEARCH_CAN_COMPILE_FLOAT16 -#define SIMSIMD_NATIVE_F16 1 +#define NK_NATIVE_F16 1 #else -#define SIMSIMD_NATIVE_F16 0 +#define NK_NATIVE_F16 0 #endif #endif // Propagate the `bf16` settings #if defined(USEARCH_CAN_COMPILE_BF16) || defined(USEARCH_CAN_COMPILE_BFLOAT16) #if USEARCH_CAN_COMPILE_BF16 || USEARCH_CAN_COMPILE_BFLOAT16 -#define SIMSIMD_NATIVE_BF16 1 +#define NK_NATIVE_BF16 1 #else -#define SIMSIMD_NATIVE_BF16 0 +#define NK_NATIVE_BF16 0 #endif #endif // No problem, if some of the functions are unused or undefined @@ -71,7 +56,7 @@ #pragma warning(disable : 4101) // "Unused variables" #pragma warning(disable : 4068) // "Unknown pragmas", when MSVC tries to read GCC pragmas #endif // _MSC_VER -#include +#include #ifdef _MSC_VER #pragma warning(pop) #endif // _MSC_VER @@ -88,11 +73,19 @@ struct uuid_t { std::uint8_t octets[16]; }; -class f16_bits_t; class bf16_bits_t; +class f16_bits_t; +class e5m2_bits_t; +class e4m3_bits_t; +class e3m2_bits_t; +class e2m3_bits_t; -using f16_t = f16_bits_t; using bf16_t = bf16_bits_t; +using f16_t = f16_bits_t; +using e5m2_t = e5m2_bits_t; +using e4m3_t = e4m3_bits_t; +using e3m2_t = e3m2_bits_t; +using e2m3_t = e2m3_bits_t; using f64_t = double; using f32_t = float; @@ -107,6 +100,14 @@ using i32_t = std::int32_t; using i16_t = std::int16_t; using i8_t = std::int8_t; +/** + * @brief Reinterpret-cast between float and uint32 without UB on most compilers. + */ +union fu32_t { + float f; + std::uint32_t u; +}; + /** * @brief Enumerates the most commonly used distance metrics, mostly for dense vector representations. */ @@ -142,11 +143,15 @@ enum class scalar_kind_t : std::uint8_t { u40_k = 2, uuid_k = 3, bf16_k = 4, + // Mini-floats: + e5m2_k = 5, ///< FP8 IEEE 754: 1 sign + 5 exponent + 2 mantissa, range +/-57344 + e4m3_k = 6, ///< FP8 OCP: 1 sign + 4 exponent + 3 mantissa, range +/-448 + e3m2_k = 8, ///< FP6: 1 sign + 3 exponent + 2 mantissa, range +/-28 + e2m3_k = 7, ///< FP6: 1 sign + 2 exponent + 3 mantissa, range +/-7.5 // Common: f64_k = 10, f32_k = 11, f16_k = 12, - f8_k = 13, // Common Integral: u64_k = 14, u32_k = 15, @@ -176,6 +181,14 @@ template scalar_kind_t scalar_kind() noexcept { return scalar_kind_t::f16_k; if (std::is_same()) return scalar_kind_t::bf16_k; + if (std::is_same()) + return scalar_kind_t::e5m2_k; + if (std::is_same()) + return scalar_kind_t::e4m3_k; + if (std::is_same()) + return scalar_kind_t::e3m2_k; + if (std::is_same()) + return scalar_kind_t::e2m3_k; if (std::is_same()) return scalar_kind_t::i8_k; if (std::is_same()) @@ -251,7 +264,10 @@ inline std::size_t bits_per_scalar(scalar_kind_t scalar_kind) noexcept { case scalar_kind_t::f16_k: return 16; case scalar_kind_t::u8_k: return 8; case scalar_kind_t::i8_k: return 8; - case scalar_kind_t::f8_k: return 8; + case scalar_kind_t::e5m2_k: return 8; + case scalar_kind_t::e4m3_k: return 8; + case scalar_kind_t::e2m3_k: return 8; + case scalar_kind_t::e3m2_k: return 8; default: return 0; } } @@ -277,7 +293,10 @@ inline std::size_t bits_per_scalar_word(scalar_kind_t scalar_kind) noexcept { case scalar_kind_t::f16_k: return 16; case scalar_kind_t::u8_k: return 8; case scalar_kind_t::i8_k: return 8; - case scalar_kind_t::f8_k: return 8; + case scalar_kind_t::e5m2_k: return 8; + case scalar_kind_t::e4m3_k: return 8; + case scalar_kind_t::e2m3_k: return 8; + case scalar_kind_t::e3m2_k: return 8; default: return 0; } } @@ -302,7 +321,10 @@ inline char const* scalar_kind_name(scalar_kind_t scalar_kind) noexcept { case scalar_kind_t::f16_k: return "f16"; case scalar_kind_t::u8_k: return "u8"; case scalar_kind_t::i8_k: return "i8"; - case scalar_kind_t::f8_k: return "f8"; + case scalar_kind_t::e5m2_k: return "e5m2"; + case scalar_kind_t::e4m3_k: return "e4m3"; + case scalar_kind_t::e2m3_k: return "e2m3"; + case scalar_kind_t::e3m2_k: return "e3m2"; default: return ""; } } @@ -332,20 +354,30 @@ inline char const* metric_kind_name(metric_kind_t metric) noexcept { */ inline expected_gt scalar_kind_from_name(char const* name, std::size_t len) { expected_gt parsed; - if (str_equals(name, len, "f32")) - parsed.result = scalar_kind_t::f32_k; - else if (str_equals(name, len, "f64")) + if (str_equals(name, len, "f64")) parsed.result = scalar_kind_t::f64_k; - else if (str_equals(name, len, "f16")) - parsed.result = scalar_kind_t::f16_k; + else if (str_equals(name, len, "f32")) + parsed.result = scalar_kind_t::f32_k; else if (str_equals(name, len, "bf16")) parsed.result = scalar_kind_t::bf16_k; + else if (str_equals(name, len, "f16")) + parsed.result = scalar_kind_t::f16_k; + else if (str_equals(name, len, "e5m2")) + parsed.result = scalar_kind_t::e5m2_k; + else if (str_equals(name, len, "e4m3")) + parsed.result = scalar_kind_t::e4m3_k; + else if (str_equals(name, len, "e3m2")) + parsed.result = scalar_kind_t::e3m2_k; + else if (str_equals(name, len, "e2m3")) + parsed.result = scalar_kind_t::e2m3_k; else if (str_equals(name, len, "i8")) parsed.result = scalar_kind_t::i8_k; + else if (str_equals(name, len, "u8")) + parsed.result = scalar_kind_t::u8_k; else if (str_equals(name, len, "b1")) parsed.result = scalar_kind_t::b1x8_k; else - parsed.failed("Unknown type, choose: f64, f32, f16, bf16, i8, b1"); + parsed.failed("Unknown type, choose: f64, f32, bf16, f16, e5m2, e4m3, e3m2, e2m3, i8, u8, b1"); return parsed; } @@ -396,15 +428,30 @@ inline expected_gt metric_from_name(char const* name) { * @brief Convenience function to upcast a half-precision floating point number to a single-precision one. */ inline float f16_to_f32(std::uint16_t u16) noexcept { -#if USEARCH_USE_FP16LIB - return fp16_ieee_to_fp32_value(u16); -#elif USEARCH_USE_SIMSIMD - return simsimd_f16_to_f32((simsimd_f16_t const*)&u16); +#if USEARCH_USE_NUMKONG + nk_f32_t result; + nk_f16_to_f32_serial((nk_f16_t const*)&u16, &result); + return result; #else -#warning "It's recommended to use SimSIMD and fp16lib for half-precision numerics" - _Float16 f16; - std::memcpy(&f16, &u16, sizeof(std::uint16_t)); - return float(f16); + std::uint32_t sign = (u16 >> 15) & 1; + std::uint32_t exponent = (u16 >> 10) & 0x1F; + std::uint32_t mantissa = u16 & 0x03FF; + fu32_t conv; + if (exponent == 0) { + if (mantissa == 0) { + conv.u = sign << 31; + } else { + // Denormal: use FPU normalization trick + fu32_t temp; + temp.f = (float)mantissa; + conv.u = (sign << 31) | (temp.u - 0x0C000000u); + } + } else if (exponent == 31) { + conv.u = (sign << 31) | 0x7F800000u | (mantissa << 13); + } else { + conv.u = (sign << 31) | ((exponent + 112u) << 23) | (mantissa << 13); + } + return conv.f; #endif } @@ -412,28 +459,75 @@ inline float f16_to_f32(std::uint16_t u16) noexcept { * @brief Convenience function to downcast a single-precision floating point number to a half-precision one. */ inline std::uint16_t f32_to_f16(float f32) noexcept { -#if USEARCH_USE_FP16LIB - return fp16_ieee_from_fp32_value(f32); -#elif USEARCH_USE_SIMSIMD - std::uint16_t result; - simsimd_f32_to_f16(f32, (simsimd_f16_t*)&result); - return result; -#else -#warning "It's recommended to use SimSIMD and fp16lib for half-precision numerics" - _Float16 f16 = _Float16(f32); +#if USEARCH_USE_NUMKONG + nk_f16_t result; + nk_f32_to_f16_serial((nk_f32_t const*)&f32, &result); std::uint16_t u16; - std::memcpy(&u16, &f16, sizeof(std::uint16_t)); + std::memcpy(&u16, &result, sizeof(u16)); return u16; +#else + fu32_t conv; + conv.f = f32; + std::uint32_t sign = (conv.u >> 31) & 1; + std::uint32_t exponent = (conv.u >> 23) & 0xFF; + std::uint32_t mantissa = conv.u & 0x007FFFFFu; + std::uint16_t result; + if (exponent == 0) { + result = (std::uint16_t)(sign << 15); + } else if (exponent == 255) { + std::uint16_t payload = (std::uint16_t)(mantissa >> 13); + if (mantissa != 0 && payload == 0) + payload = 1; + result = (std::uint16_t)((sign << 15) | 0x7C00 | payload); + } else if (exponent <= 102) { + if (exponent == 102 && mantissa > 0) + result = (std::uint16_t)((sign << 15) | 0x0001); + else + result = (std::uint16_t)(sign << 15); + } else if (exponent < 113) { + // Denormal range with RNE rounding + unsigned shift = 113 - exponent; + unsigned shift_amount = shift + 13; + std::uint64_t full_mant = 0x00800000ULL | mantissa; + std::uint32_t mant = (std::uint32_t)(full_mant >> shift_amount); + std::uint32_t round_bit = (std::uint32_t)((full_mant >> (shift_amount - 1)) & 1); + std::uint64_t sticky_bits = full_mant & ((1ULL << (shift_amount - 1)) - 1); + if (round_bit && (sticky_bits || (mant & 1))) + mant++; + result = (std::uint16_t)((sign << 15) | mant); + } else if (exponent < 143) { + // Normal range with RNE rounding + std::uint32_t f16_exp = exponent - 112; + std::uint32_t f16_mant = mantissa >> 13; + std::uint32_t round_bit = (mantissa >> 12) & 1; + std::uint32_t sticky_bits = mantissa & 0xFFF; + if (round_bit && (sticky_bits || (f16_mant & 1))) { + f16_mant++; + if (f16_mant > 0x3FF) { + f16_mant = 0; + f16_exp++; + } + } + if (f16_exp > 30) + result = (std::uint16_t)((sign << 15) | 0x7C00); + else + result = (std::uint16_t)((sign << 15) | (f16_exp << 10) | f16_mant); + } else { + result = (std::uint16_t)((sign << 15) | 0x7C00); + } + return result; #endif } /** * @brief Convenience function to upcast a brain-floating point number to a single-precision one. - * https://github.com/ashvardanian/SimSIMD/blob/ff51434d90c66f916e94ff05b24530b127aa4cff/include/simsimd/types.h#L395-L410 + * https://github.com/ashvardanian/NumKong/blob/7e58e9fee9e096238cf29f7c30774fa3dcd0fe85/include/numkong/cast/serial.h#L226-L244 */ inline float bf16_to_f32(std::uint16_t u16) noexcept { -#if USEARCH_USE_SIMSIMD - return simsimd_bf16_to_f32((simsimd_bf16_t const*)&u16); +#if USEARCH_USE_NUMKONG + nk_f32_t result; + nk_bf16_to_f32_serial((nk_bf16_t const*)&u16, &result); + return result; #else union float_or_unsigned_int_t { float f; @@ -446,13 +540,15 @@ inline float bf16_to_f32(std::uint16_t u16) noexcept { /** * @brief Convenience function to downcast a single-precision floating point number to a brain-floating point one. - * https://github.com/ashvardanian/SimSIMD/blob/ff51434d90c66f916e94ff05b24530b127aa4cff/include/simsimd/types.h#L412-L425 + * https://github.com/ashvardanian/NumKong/blob/7e58e9fee9e096238cf29f7c30774fa3dcd0fe85/include/numkong/cast/serial.h#L244-L262 */ inline std::uint16_t f32_to_bf16(float f32) noexcept { -#if USEARCH_USE_SIMSIMD - std::uint16_t result; - simsimd_f32_to_bf16(f32, (simsimd_bf16_t*)&result); - return result; +#if USEARCH_USE_NUMKONG + nk_bf16_t result; + nk_f32_to_bf16_serial((nk_f32_t const*)&f32, &result); + std::uint16_t u16; + std::memcpy(&u16, &result, sizeof(u16)); + return u16; #else union float_or_unsigned_int_t { float f; @@ -598,6 +694,656 @@ class bf16_bits_t { initializer(omp_priv = unum::usearch::bf16_bits_t()) #endif +/** + * @brief Convenience function to upcast an FP8 E5M2 value to single-precision. + * E5M2: 1 sign + 5 exponent (bias=15) + 2 mantissa, range +/-57344, supports inf/NaN. + */ +inline float e5m2_to_f32(std::uint8_t u8) noexcept { +#if USEARCH_USE_NUMKONG + nk_f32_t result; + nk_e5m2_to_f32_serial((nk_e5m2_t const*)&u8, &result); + return result; +#else + // 128-entry LUT for the 7-bit magnitude, sign handled separately. + static std::uint32_t const lut[128] = { + 0x00000000, 0x37800000, 0x38000000, 0x38400000, // exp=0 sub + 0x38800000, 0x38A00000, 0x38C00000, 0x38E00000, // exp=1 + 0x39000000, 0x39200000, 0x39400000, 0x39600000, // exp=2 + 0x39800000, 0x39A00000, 0x39C00000, 0x39E00000, // exp=3 + 0x3A000000, 0x3A200000, 0x3A400000, 0x3A600000, // exp=4 + 0x3A800000, 0x3AA00000, 0x3AC00000, 0x3AE00000, // exp=5 + 0x3B000000, 0x3B200000, 0x3B400000, 0x3B600000, // exp=6 + 0x3B800000, 0x3BA00000, 0x3BC00000, 0x3BE00000, // exp=7 + 0x3C000000, 0x3C200000, 0x3C400000, 0x3C600000, // exp=8 + 0x3C800000, 0x3CA00000, 0x3CC00000, 0x3CE00000, // exp=9 + 0x3D000000, 0x3D200000, 0x3D400000, 0x3D600000, // exp=10 + 0x3D800000, 0x3DA00000, 0x3DC00000, 0x3DE00000, // exp=11 + 0x3E000000, 0x3E200000, 0x3E400000, 0x3E600000, // exp=12 + 0x3E800000, 0x3EA00000, 0x3EC00000, 0x3EE00000, // exp=13 + 0x3F000000, 0x3F200000, 0x3F400000, 0x3F600000, // exp=14 + 0x3F800000, 0x3FA00000, 0x3FC00000, 0x3FE00000, // exp=15 + 0x40000000, 0x40200000, 0x40400000, 0x40600000, // exp=16 + 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, // exp=17 + 0x41000000, 0x41200000, 0x41400000, 0x41600000, // exp=18 + 0x41800000, 0x41A00000, 0x41C00000, 0x41E00000, // exp=19 + 0x42000000, 0x42200000, 0x42400000, 0x42600000, // exp=20 + 0x42800000, 0x42A00000, 0x42C00000, 0x42E00000, // exp=21 + 0x43000000, 0x43200000, 0x43400000, 0x43600000, // exp=22 + 0x43800000, 0x43A00000, 0x43C00000, 0x43E00000, // exp=23 + 0x44000000, 0x44200000, 0x44400000, 0x44600000, // exp=24 + 0x44800000, 0x44A00000, 0x44C00000, 0x44E00000, // exp=25 + 0x45000000, 0x45200000, 0x45400000, 0x45600000, // exp=26 + 0x45800000, 0x45A00000, 0x45C00000, 0x45E00000, // exp=27 + 0x46000000, 0x46200000, 0x46400000, 0x46600000, // exp=28 + 0x46800000, 0x46A00000, 0x46C00000, 0x46E00000, // exp=29 + 0x47000000, 0x47200000, 0x47400000, 0x47600000, // exp=30 + 0x7F800000, 0x7FC00000, 0x7FC00000, 0x7FC00000, // inf, nan + }; + std::uint32_t sign = (std::uint32_t)(u8 & 0x80) << 24; + fu32_t conv; + conv.u = sign | lut[u8 & 0x7F]; + return conv.f; +#endif +} + +/** + * @brief Convenience function to downcast a single-precision value to FP8 E5M2. + * Uses RNE rounding. Overflow โ†’ inf, NaN โ†’ NaN, subnormals handled. + */ +inline std::uint8_t f32_to_e5m2(float f32) noexcept { +#if USEARCH_USE_NUMKONG + nk_e5m2_t result; + nk_f32_to_e5m2_serial((nk_f32_t const*)&f32, &result); + return result; +#else + fu32_t conv; + conv.f = f32; + std::uint32_t sign_bit = conv.u >> 31; + std::uint32_t abs_bits = conv.u & 0x7FFFFFFFu; + std::uint8_t sign = (std::uint8_t)(sign_bit << 7); + + // NaN or inf + if (abs_bits >= 0x7F800000u) { + std::uint8_t mant = (abs_bits > 0x7F800000u) ? 0x01u : 0x00u; + return (std::uint8_t)(sign | 0x7Cu | mant); + } + if (abs_bits == 0) + return sign; + + float abs_x = sign_bit ? -f32 : f32; + + // Subnormal range: |x| < 2^-14 + if (abs_x < (1.0f / 16384.0f)) { + float scaled = abs_x * 65536.0f; + int mant = (int)scaled; + float frac = scaled - (float)mant; + if (frac > 0.5f || (frac == 0.5f && (mant & 1))) + ++mant; + if (mant > 3) + return (std::uint8_t)(sign | 0x04u); + return (std::uint8_t)(sign | (std::uint8_t)mant); + } + + int exp = (int)((abs_bits >> 23) & 0xFFu) - 127; + std::uint32_t mantissa = abs_bits & 0x7FFFFFu; + std::uint32_t significand = (1u << 23) | mantissa; + int shift = 23 - 2; + std::uint32_t remainder_mask = (1u << shift) - 1; + std::uint32_t remainder = significand & remainder_mask; + std::uint32_t halfway = 1u << (shift - 1); + std::uint32_t significand_rounded = significand >> shift; + if (remainder > halfway || (remainder == halfway && (significand_rounded & 1))) + ++significand_rounded; + if (significand_rounded == (1u << 3)) { + significand_rounded >>= 1; + ++exp; + } + if (exp > 15) + return (std::uint8_t)(sign | 0x7Cu); // overflow โ†’ inf + if (exp < -14) { + float scaled = abs_x * 65536.0f; + int mant = (int)scaled; + float frac = scaled - (float)mant; + if (frac > 0.5f || (frac == 0.5f && (mant & 1))) + ++mant; + if (mant > 3) + return (std::uint8_t)(sign | 0x04u); + return (std::uint8_t)(sign | (std::uint8_t)mant); + } + + std::uint8_t exp_field = (std::uint8_t)(exp + 15); + std::uint8_t mant_field = (std::uint8_t)(significand_rounded & 0x03u); + return (std::uint8_t)(sign | (exp_field << 2) | mant_field); +#endif +} + +/** + * @brief Convenience function to upcast an FP8 E4M3 value to single-precision. + * E4M3: 1 sign + 4 exponent (bias=7) + 3 mantissa, range +/-448, no inf. + */ +inline float e4m3_to_f32(std::uint8_t u8) noexcept { +#if USEARCH_USE_NUMKONG + nk_f32_t result; + nk_e4m3_to_f32_serial((nk_e4m3_t const*)&u8, &result); + return result; +#else + static std::uint32_t const lut[128] = { + 0x00000000, 0x3B000000, 0x3B800000, 0x3BC00000, 0x3C000000, 0x3C200000, 0x3C400000, 0x3C600000, // exp=0 sub + 0x3C800000, 0x3C900000, 0x3CA00000, 0x3CB00000, 0x3CC00000, 0x3CD00000, 0x3CE00000, 0x3CF00000, // exp=1 + 0x3D000000, 0x3D100000, 0x3D200000, 0x3D300000, 0x3D400000, 0x3D500000, 0x3D600000, 0x3D700000, // exp=2 + 0x3D800000, 0x3D900000, 0x3DA00000, 0x3DB00000, 0x3DC00000, 0x3DD00000, 0x3DE00000, 0x3DF00000, // exp=3 + 0x3E000000, 0x3E100000, 0x3E200000, 0x3E300000, 0x3E400000, 0x3E500000, 0x3E600000, 0x3E700000, // exp=4 + 0x3E800000, 0x3E900000, 0x3EA00000, 0x3EB00000, 0x3EC00000, 0x3ED00000, 0x3EE00000, 0x3EF00000, // exp=5 + 0x3F000000, 0x3F100000, 0x3F200000, 0x3F300000, 0x3F400000, 0x3F500000, 0x3F600000, 0x3F700000, // exp=6 + 0x3F800000, 0x3F900000, 0x3FA00000, 0x3FB00000, 0x3FC00000, 0x3FD00000, 0x3FE00000, 0x3FF00000, // exp=7 + 0x40000000, 0x40100000, 0x40200000, 0x40300000, 0x40400000, 0x40500000, 0x40600000, 0x40700000, // exp=8 + 0x40800000, 0x40900000, 0x40A00000, 0x40B00000, 0x40C00000, 0x40D00000, 0x40E00000, 0x40F00000, // exp=9 + 0x41000000, 0x41100000, 0x41200000, 0x41300000, 0x41400000, 0x41500000, 0x41600000, 0x41700000, // exp=10 + 0x41800000, 0x41900000, 0x41A00000, 0x41B00000, 0x41C00000, 0x41D00000, 0x41E00000, 0x41F00000, // exp=11 + 0x42000000, 0x42100000, 0x42200000, 0x42300000, 0x42400000, 0x42500000, 0x42600000, 0x42700000, // exp=12 + 0x42800000, 0x42900000, 0x42A00000, 0x42B00000, 0x42C00000, 0x42D00000, 0x42E00000, 0x42F00000, // exp=13 + 0x43000000, 0x43100000, 0x43200000, 0x43300000, 0x43400000, 0x43500000, 0x43600000, 0x43700000, // exp=14 + 0x43800000, 0x43900000, 0x43A00000, 0x43B00000, 0x43C00000, 0x43D00000, 0x43E00000, 0x7FC00000, // exp=15 + }; + std::uint32_t sign = (std::uint32_t)(u8 & 0x80) << 24; + fu32_t conv; + conv.u = sign | lut[u8 & 0x7F]; + return conv.f; +#endif +} + +/** + * @brief Convenience function to downcast a single-precision value to FP8 E4M3. + * Uses RNE rounding. Overflow saturates to +/-448 (no inf in E4M3FN). + */ +inline std::uint8_t f32_to_e4m3(float f32) noexcept { +#if USEARCH_USE_NUMKONG + nk_e4m3_t result; + nk_f32_to_e4m3_serial((nk_f32_t const*)&f32, &result); + return result; +#else + fu32_t conv; + conv.f = f32; + std::uint32_t sign_bit = conv.u >> 31; + std::uint32_t abs_bits = conv.u & 0x7FFFFFFFu; + std::uint8_t sign = (std::uint8_t)(sign_bit << 7); + + // NaN โ†’ E4M3FN NaN + if (abs_bits > 0x7F800000u) + return (std::uint8_t)(sign | 0x7Fu); + // Inf โ†’ saturate to max (448) + if (abs_bits == 0x7F800000u) + return (std::uint8_t)(sign | 0x7Eu); + if (abs_bits == 0) + return sign; + + float abs_x = sign_bit ? -f32 : f32; + + // Subnormal range: |x| < 2^-6 + if (abs_x < (1.0f / 64.0f)) { + float scaled = abs_x * 512.0f; + int mant = (int)scaled; + float frac = scaled - (float)mant; + if (frac > 0.5f || (frac == 0.5f && (mant & 1))) + ++mant; + if (mant > 7) + return (std::uint8_t)(sign | 0x08u); + return (std::uint8_t)(sign | (std::uint8_t)mant); + } + + int exp = (int)((abs_bits >> 23) & 0xFFu) - 127; + std::uint32_t mantissa = abs_bits & 0x7FFFFFu; + std::uint32_t significand = (1u << 23) | mantissa; + int shift = 23 - 3; + std::uint32_t remainder_mask = (1u << shift) - 1; + std::uint32_t remainder = significand & remainder_mask; + std::uint32_t halfway = 1u << (shift - 1); + std::uint32_t significand_rounded = significand >> shift; + if (remainder > halfway || (remainder == halfway && (significand_rounded & 1))) + ++significand_rounded; + if (significand_rounded == (1u << 4)) { + significand_rounded >>= 1; + ++exp; + } + // Overflow โ†’ saturate to max (0x7E = 448) + if (exp > 8) + return (std::uint8_t)(sign | 0x7Eu); + if (exp < -6) { + float scaled = abs_x * 512.0f; + int mant = (int)scaled; + float frac = scaled - (float)mant; + if (frac > 0.5f || (frac == 0.5f && (mant & 1))) + ++mant; + if (mant > 7) + return (std::uint8_t)(sign | 0x08u); + return (std::uint8_t)(sign | (std::uint8_t)mant); + } + + std::uint8_t exp_field = (std::uint8_t)(exp + 7); + std::uint8_t mant_field = (std::uint8_t)(significand_rounded & 0x07u); + // Clamp to avoid NaN encoding (0x7F) + if (exp_field == 15 && mant_field > 6) + mant_field = 6; + return (std::uint8_t)(sign | (exp_field << 3) | mant_field); +#endif +} + +/** + * @brief Numeric type for FP8 E5M2 (IEEE 754-like) floating point. + * 1 sign + 5 exponent + 2 mantissa bits, range +/-57344. + */ +class e5m2_bits_t { + std::uint8_t uint8_{}; + + public: + inline e5m2_bits_t() noexcept : uint8_(0) {} + inline e5m2_bits_t(e5m2_bits_t&&) = default; + inline e5m2_bits_t& operator=(e5m2_bits_t&&) = default; + inline e5m2_bits_t(e5m2_bits_t const&) = default; + inline e5m2_bits_t& operator=(e5m2_bits_t const&) = default; + + inline operator float() const noexcept { return e5m2_to_f32(uint8_); } + inline explicit operator bool() const noexcept { return e5m2_to_f32(uint8_) > 0.5f; } + + inline e5m2_bits_t(int v) noexcept : uint8_(f32_to_e5m2(static_cast(v))) {} + inline e5m2_bits_t(bool v) noexcept : uint8_(f32_to_e5m2(static_cast(v))) {} + inline e5m2_bits_t(float v) noexcept : uint8_(f32_to_e5m2(v)) {} + inline e5m2_bits_t(double v) noexcept : uint8_(f32_to_e5m2(static_cast(v))) {} + + inline bool operator<(e5m2_bits_t const& other) const noexcept { return float(*this) < float(other); } + + inline e5m2_bits_t operator+(e5m2_bits_t other) const noexcept { return {float(*this) + float(other)}; } + inline e5m2_bits_t operator-(e5m2_bits_t other) const noexcept { return {float(*this) - float(other)}; } + inline e5m2_bits_t operator*(e5m2_bits_t other) const noexcept { return {float(*this) * float(other)}; } + inline e5m2_bits_t operator/(e5m2_bits_t other) const noexcept { return {float(*this) / float(other)}; } + inline float operator+(float other) const noexcept { return float(*this) + other; } + inline float operator-(float other) const noexcept { return float(*this) - other; } + inline float operator*(float other) const noexcept { return float(*this) * other; } + inline float operator/(float other) const noexcept { return float(*this) / other; } + inline double operator+(double other) const noexcept { return float(*this) + other; } + inline double operator-(double other) const noexcept { return float(*this) - other; } + inline double operator*(double other) const noexcept { return float(*this) * other; } + inline double operator/(double other) const noexcept { return float(*this) / other; } + + inline e5m2_bits_t& operator+=(float v) noexcept { + uint8_ = f32_to_e5m2(v + e5m2_to_f32(uint8_)); + return *this; + } + inline e5m2_bits_t& operator-=(float v) noexcept { + uint8_ = f32_to_e5m2(v - e5m2_to_f32(uint8_)); + return *this; + } + inline e5m2_bits_t& operator*=(float v) noexcept { + uint8_ = f32_to_e5m2(v * e5m2_to_f32(uint8_)); + return *this; + } + inline e5m2_bits_t& operator/=(float v) noexcept { + uint8_ = f32_to_e5m2(v / e5m2_to_f32(uint8_)); + return *this; + } + inline e5m2_bits_t& operator=(float v) noexcept { + uint8_ = f32_to_e5m2(v); + return *this; + } +}; + +/** + * @brief Numeric type for FP8 E4M3 (OCP) floating point. + * 1 sign + 4 exponent + 3 mantissa bits, range +/-448. + */ +class e4m3_bits_t { + std::uint8_t uint8_{}; + + public: + inline e4m3_bits_t() noexcept : uint8_(0) {} + inline e4m3_bits_t(e4m3_bits_t&&) = default; + inline e4m3_bits_t& operator=(e4m3_bits_t&&) = default; + inline e4m3_bits_t(e4m3_bits_t const&) = default; + inline e4m3_bits_t& operator=(e4m3_bits_t const&) = default; + + inline operator float() const noexcept { return e4m3_to_f32(uint8_); } + inline explicit operator bool() const noexcept { return e4m3_to_f32(uint8_) > 0.5f; } + + inline e4m3_bits_t(int v) noexcept : uint8_(f32_to_e4m3(static_cast(v))) {} + inline e4m3_bits_t(bool v) noexcept : uint8_(f32_to_e4m3(static_cast(v))) {} + inline e4m3_bits_t(float v) noexcept : uint8_(f32_to_e4m3(v)) {} + inline e4m3_bits_t(double v) noexcept : uint8_(f32_to_e4m3(static_cast(v))) {} + + inline bool operator<(e4m3_bits_t const& other) const noexcept { return float(*this) < float(other); } + + inline e4m3_bits_t operator+(e4m3_bits_t other) const noexcept { return {float(*this) + float(other)}; } + inline e4m3_bits_t operator-(e4m3_bits_t other) const noexcept { return {float(*this) - float(other)}; } + inline e4m3_bits_t operator*(e4m3_bits_t other) const noexcept { return {float(*this) * float(other)}; } + inline e4m3_bits_t operator/(e4m3_bits_t other) const noexcept { return {float(*this) / float(other)}; } + inline float operator+(float other) const noexcept { return float(*this) + other; } + inline float operator-(float other) const noexcept { return float(*this) - other; } + inline float operator*(float other) const noexcept { return float(*this) * other; } + inline float operator/(float other) const noexcept { return float(*this) / other; } + inline double operator+(double other) const noexcept { return float(*this) + other; } + inline double operator-(double other) const noexcept { return float(*this) - other; } + inline double operator*(double other) const noexcept { return float(*this) * other; } + inline double operator/(double other) const noexcept { return float(*this) / other; } + + inline e4m3_bits_t& operator+=(float v) noexcept { + uint8_ = f32_to_e4m3(v + e4m3_to_f32(uint8_)); + return *this; + } + inline e4m3_bits_t& operator-=(float v) noexcept { + uint8_ = f32_to_e4m3(v - e4m3_to_f32(uint8_)); + return *this; + } + inline e4m3_bits_t& operator*=(float v) noexcept { + uint8_ = f32_to_e4m3(v * e4m3_to_f32(uint8_)); + return *this; + } + inline e4m3_bits_t& operator/=(float v) noexcept { + uint8_ = f32_to_e4m3(v / e4m3_to_f32(uint8_)); + return *this; + } + inline e4m3_bits_t& operator=(float v) noexcept { + uint8_ = f32_to_e4m3(v); + return *this; + } +}; + +/** + * @brief Convenience function to upcast an FP6 E2M3 value to single-precision. + * E2M3: 1 sign + 2 exponent (bias=1) + 3 mantissa, stored as 0b00SEEMMM, range +/-7.5. + */ +inline float e2m3_to_f32(std::uint8_t u8) noexcept { +#if USEARCH_USE_NUMKONG + nk_f32_t result; + nk_e2m3_to_f32_serial((nk_e2m3_t const*)&u8, &result); + return result; +#else + static std::uint32_t const lut[64] = { + 0x00000000, 0x3E000000, 0x3E800000, 0x3EC00000, 0x3F000000, 0x3F200000, 0x3F400000, 0x3F600000, // positive + 0x3F800000, 0x3F900000, 0x3FA00000, 0x3FB00000, 0x3FC00000, 0x3FD00000, 0x3FE00000, 0x3FF00000, // positive + 0x40000000, 0x40100000, 0x40200000, 0x40300000, 0x40400000, 0x40500000, 0x40600000, 0x40700000, // positive + 0x40800000, 0x40900000, 0x40A00000, 0x40B00000, 0x40C00000, 0x40D00000, 0x40E00000, 0x40F00000, // positive + 0x80000000, 0xBE000000, 0xBE800000, 0xBEC00000, 0xBF000000, 0xBF200000, 0xBF400000, 0xBF600000, // negative + 0xBF800000, 0xBF900000, 0xBFA00000, 0xBFB00000, 0xBFC00000, 0xBFD00000, 0xBFE00000, 0xBFF00000, // negative + 0xC0000000, 0xC0100000, 0xC0200000, 0xC0300000, 0xC0400000, 0xC0500000, 0xC0600000, 0xC0700000, // negative + 0xC0800000, 0xC0900000, 0xC0A00000, 0xC0B00000, 0xC0C00000, 0xC0D00000, 0xC0E00000, 0xC0F00000, // negative + }; + fu32_t conv; + conv.u = lut[u8 & 0x3F]; + return conv.f; +#endif +} + +/** + * @brief Convenience function to downcast a single-precision value to FP6 E2M3. + */ +inline std::uint8_t f32_to_e2m3(float f32) noexcept { +#if USEARCH_USE_NUMKONG + nk_e2m3_t result; + nk_f32_to_e2m3_serial((nk_f32_t const*)&f32, &result); + return result; +#else + fu32_t conv; + conv.f = f32; + std::uint32_t sign_bit = conv.u >> 31; + std::uint32_t abs_bits = conv.u & 0x7FFFFFFFu; + std::uint8_t sign = (std::uint8_t)(sign_bit << 5); + if (abs_bits == 0) + return sign; + float abs_x = sign_bit ? -f32 : f32; + // E2M3: bias=1, 2 exp bits, 3 mant bits, max normal = 7.5, min subnormal = 0.125 + if (abs_x >= 7.5f) + return (std::uint8_t)(sign | 0x1Fu); // saturate to max + if (abs_x < 0.0625f) + return sign; // underflow to zero + // Subnormal range: abs_x < 1.0 (min normal = 2^(1-1) = 1.0) + if (abs_x < 1.0f) { + // Subnormals: value = mant * 2^(-3) = mant/8, so mant = round(abs_x * 8) + std::uint32_t mant = (std::uint32_t)(abs_x * 8.0f + 0.5f); + if (mant > 7) + mant = 7; + if (mant == 0) + return sign; + return (std::uint8_t)(sign | mant); + } + // Normal range: value = (1 + mant/8) * 2^(exp-1) + // Find exponent: exp-1 = floor(log2(abs_x)), so exp = floor(log2(abs_x)) + 1 + int exp_val; + float frac = std::frexp(abs_x, &exp_val); // abs_x = frac * 2^exp_val, frac in [0.5, 1) + // frexp returns frac in [0.5, 1), but we want significand in [1, 2) + // so significand = frac * 2, and true_exp = exp_val - 1 + float significand = frac * 2.0f; // [1.0, 2.0) + int biased_exp = exp_val; // exp_val - 1 + bias(1) = exp_val + if (biased_exp < 1) { + // Fell into subnormal, handled above + std::uint32_t mant = (std::uint32_t)(abs_x * 8.0f + 0.5f); + if (mant > 7) + mant = 7; + return (std::uint8_t)(sign | mant); + } + if (biased_exp > 3) + biased_exp = 3; // clamp to max exp + // Round mantissa: 3 mant bits, significand in [1, 2) + std::uint32_t mant = (std::uint32_t)((significand - 1.0f) * 8.0f + 0.5f); + if (mant > 7) { + mant = 0; + biased_exp++; + } + if (biased_exp > 3) + return (std::uint8_t)(sign | 0x1Fu); // overflow + return (std::uint8_t)(sign | (biased_exp << 3) | mant); +#endif +} + +/** + * @brief Convenience function to upcast an FP6 E3M2 value to single-precision. + * E3M2: 1 sign + 3 exponent (bias=3) + 2 mantissa, stored as 0b00SEEEMM, range +/-28. + */ +inline float e3m2_to_f32(std::uint8_t u8) noexcept { +#if USEARCH_USE_NUMKONG + nk_f32_t result; + nk_e3m2_to_f32_serial((nk_e3m2_t const*)&u8, &result); + return result; +#else + static std::uint32_t const lut[64] = { + 0x00000000, 0x3D800000, 0x3E000000, 0x3E400000, 0x3E800000, 0x3EA00000, 0x3EC00000, 0x3EE00000, // positive + 0x3F000000, 0x3F200000, 0x3F400000, 0x3F600000, 0x3F800000, 0x3FA00000, 0x3FC00000, 0x3FE00000, // positive + 0x40000000, 0x40200000, 0x40400000, 0x40600000, 0x40800000, 0x40A00000, 0x40C00000, 0x40E00000, // positive + 0x41000000, 0x41200000, 0x41400000, 0x41600000, 0x41800000, 0x41A00000, 0x41C00000, 0x41E00000, // positive + 0x80000000, 0xBD800000, 0xBE000000, 0xBE400000, 0xBE800000, 0xBEA00000, 0xBEC00000, 0xBEE00000, // negative + 0xBF000000, 0xBF200000, 0xBF400000, 0xBF600000, 0xBF800000, 0xBFA00000, 0xBFC00000, 0xBFE00000, // negative + 0xC0000000, 0xC0200000, 0xC0400000, 0xC0600000, 0xC0800000, 0xC0A00000, 0xC0C00000, 0xC0E00000, // negative + 0xC1000000, 0xC1200000, 0xC1400000, 0xC1600000, 0xC1800000, 0xC1A00000, 0xC1C00000, 0xC1E00000, // negative + }; + fu32_t conv; + conv.u = lut[u8 & 0x3F]; + return conv.f; +#endif +} + +/** + * @brief Convenience function to downcast a single-precision value to FP6 E3M2. + */ +inline std::uint8_t f32_to_e3m2(float f32) noexcept { +#if USEARCH_USE_NUMKONG + nk_e3m2_t result; + nk_f32_to_e3m2_serial((nk_f32_t const*)&f32, &result); + return result; +#else + fu32_t conv; + conv.f = f32; + std::uint32_t sign_bit = conv.u >> 31; + std::uint32_t abs_bits = conv.u & 0x7FFFFFFFu; + std::uint8_t sign = (std::uint8_t)(sign_bit << 5); + if (abs_bits == 0) + return sign; + float abs_x = sign_bit ? -f32 : f32; + // E3M2: bias=3, 3 exp bits, 2 mant bits, max normal = 28.0, min subnormal = 0.0625 + if (abs_x >= 28.0f) + return (std::uint8_t)(sign | 0x1Fu); // saturate to max + if (abs_x < 0.03125f) + return sign; // underflow to zero + // Subnormal range: abs_x < 0.25 (min normal = 2^(1-3) = 0.25) + if (abs_x < 0.25f) { + // Subnormals: value = mant * 2^(-4) = mant/16, so mant = round(abs_x * 16) + std::uint32_t mant = (std::uint32_t)(abs_x * 16.0f + 0.5f); + if (mant > 3) + mant = 3; + if (mant == 0) + return sign; + return (std::uint8_t)(sign | mant); + } + // Normal range: value = (1 + mant/4) * 2^(exp-3) + int exp_val; + float frac = std::frexp(abs_x, &exp_val); + float significand = frac * 2.0f; + int biased_exp = exp_val - 1 + 3; // true_exp = exp_val - 1, biased = true_exp + 3 + if (biased_exp < 1) { + std::uint32_t mant = (std::uint32_t)(abs_x * 16.0f + 0.5f); + if (mant > 3) + mant = 3; + return (std::uint8_t)(sign | mant); + } + if (biased_exp > 7) + biased_exp = 7; + std::uint32_t mant = (std::uint32_t)((significand - 1.0f) * 4.0f + 0.5f); + if (mant > 3) { + mant = 0; + biased_exp++; + } + if (biased_exp > 7) + return (std::uint8_t)(sign | 0x1Fu); + return (std::uint8_t)(sign | (biased_exp << 2) | mant); +#endif +} + +/** + * @brief Numeric type for FP6 E2M3 floating point. + * 1 sign + 2 exponent + 3 mantissa bits, stored as 0b00SEEMMM, range +/-7.5. + */ +class e2m3_bits_t { + std::uint8_t uint8_{}; + + public: + inline e2m3_bits_t() noexcept : uint8_(0) {} + inline e2m3_bits_t(e2m3_bits_t&&) = default; + inline e2m3_bits_t& operator=(e2m3_bits_t&&) = default; + inline e2m3_bits_t(e2m3_bits_t const&) = default; + inline e2m3_bits_t& operator=(e2m3_bits_t const&) = default; + + inline operator float() const noexcept { return e2m3_to_f32(uint8_); } + inline explicit operator bool() const noexcept { return e2m3_to_f32(uint8_) > 0.5f; } + + inline e2m3_bits_t(int v) noexcept : uint8_(f32_to_e2m3(static_cast(v))) {} + inline e2m3_bits_t(bool v) noexcept : uint8_(f32_to_e2m3(static_cast(v))) {} + inline e2m3_bits_t(float v) noexcept : uint8_(f32_to_e2m3(v)) {} + inline e2m3_bits_t(double v) noexcept : uint8_(f32_to_e2m3(static_cast(v))) {} + + inline bool operator<(e2m3_bits_t const& other) const noexcept { return float(*this) < float(other); } + + inline e2m3_bits_t operator+(e2m3_bits_t other) const noexcept { return {float(*this) + float(other)}; } + inline e2m3_bits_t operator-(e2m3_bits_t other) const noexcept { return {float(*this) - float(other)}; } + inline e2m3_bits_t operator*(e2m3_bits_t other) const noexcept { return {float(*this) * float(other)}; } + inline e2m3_bits_t operator/(e2m3_bits_t other) const noexcept { return {float(*this) / float(other)}; } + inline float operator+(float other) const noexcept { return float(*this) + other; } + inline float operator-(float other) const noexcept { return float(*this) - other; } + inline float operator*(float other) const noexcept { return float(*this) * other; } + inline float operator/(float other) const noexcept { return float(*this) / other; } + inline double operator+(double other) const noexcept { return float(*this) + other; } + inline double operator-(double other) const noexcept { return float(*this) - other; } + inline double operator*(double other) const noexcept { return float(*this) * other; } + inline double operator/(double other) const noexcept { return float(*this) / other; } + + inline e2m3_bits_t& operator+=(float v) noexcept { + uint8_ = f32_to_e2m3(v + e2m3_to_f32(uint8_)); + return *this; + } + inline e2m3_bits_t& operator-=(float v) noexcept { + uint8_ = f32_to_e2m3(v - e2m3_to_f32(uint8_)); + return *this; + } + inline e2m3_bits_t& operator*=(float v) noexcept { + uint8_ = f32_to_e2m3(v * e2m3_to_f32(uint8_)); + return *this; + } + inline e2m3_bits_t& operator/=(float v) noexcept { + uint8_ = f32_to_e2m3(v / e2m3_to_f32(uint8_)); + return *this; + } + inline e2m3_bits_t& operator=(float v) noexcept { + uint8_ = f32_to_e2m3(v); + return *this; + } +}; + +/** + * @brief Numeric type for FP6 E3M2 floating point. + * 1 sign + 3 exponent + 2 mantissa bits, stored as 0b00SEEEMM, range +/-28. + */ +class e3m2_bits_t { + std::uint8_t uint8_{}; + + public: + inline e3m2_bits_t() noexcept : uint8_(0) {} + inline e3m2_bits_t(e3m2_bits_t&&) = default; + inline e3m2_bits_t& operator=(e3m2_bits_t&&) = default; + inline e3m2_bits_t(e3m2_bits_t const&) = default; + inline e3m2_bits_t& operator=(e3m2_bits_t const&) = default; + + inline operator float() const noexcept { return e3m2_to_f32(uint8_); } + inline explicit operator bool() const noexcept { return e3m2_to_f32(uint8_) > 0.5f; } + + inline e3m2_bits_t(int v) noexcept : uint8_(f32_to_e3m2(static_cast(v))) {} + inline e3m2_bits_t(bool v) noexcept : uint8_(f32_to_e3m2(static_cast(v))) {} + inline e3m2_bits_t(float v) noexcept : uint8_(f32_to_e3m2(v)) {} + inline e3m2_bits_t(double v) noexcept : uint8_(f32_to_e3m2(static_cast(v))) {} + + inline bool operator<(e3m2_bits_t const& other) const noexcept { return float(*this) < float(other); } + + inline e3m2_bits_t operator+(e3m2_bits_t other) const noexcept { return {float(*this) + float(other)}; } + inline e3m2_bits_t operator-(e3m2_bits_t other) const noexcept { return {float(*this) - float(other)}; } + inline e3m2_bits_t operator*(e3m2_bits_t other) const noexcept { return {float(*this) * float(other)}; } + inline e3m2_bits_t operator/(e3m2_bits_t other) const noexcept { return {float(*this) / float(other)}; } + inline float operator+(float other) const noexcept { return float(*this) + other; } + inline float operator-(float other) const noexcept { return float(*this) - other; } + inline float operator*(float other) const noexcept { return float(*this) * other; } + inline float operator/(float other) const noexcept { return float(*this) / other; } + inline double operator+(double other) const noexcept { return float(*this) + other; } + inline double operator-(double other) const noexcept { return float(*this) - other; } + inline double operator*(double other) const noexcept { return float(*this) * other; } + inline double operator/(double other) const noexcept { return float(*this) / other; } + + inline e3m2_bits_t& operator+=(float v) noexcept { + uint8_ = f32_to_e3m2(v + e3m2_to_f32(uint8_)); + return *this; + } + inline e3m2_bits_t& operator-=(float v) noexcept { + uint8_ = f32_to_e3m2(v - e3m2_to_f32(uint8_)); + return *this; + } + inline e3m2_bits_t& operator*=(float v) noexcept { + uint8_ = f32_to_e3m2(v * e3m2_to_f32(uint8_)); + return *this; + } + inline e3m2_bits_t& operator/=(float v) noexcept { + uint8_ = f32_to_e3m2(v / e3m2_to_f32(uint8_)); + return *this; + } + inline e3m2_bits_t& operator=(float v) noexcept { + uint8_ = f32_to_e3m2(v); + return *this; + } +}; + +#if USEARCH_USE_OPENMP +#pragma omp declare reduction(+ : unum::usearch::e5m2_bits_t : omp_out = omp_out + omp_in) \ + initializer(omp_priv = unum::usearch::e5m2_bits_t()) +#pragma omp declare reduction(+ : unum::usearch::e4m3_bits_t : omp_out = omp_out + omp_in) \ + initializer(omp_priv = unum::usearch::e4m3_bits_t()) +#pragma omp declare reduction(+ : unum::usearch::e2m3_bits_t : omp_out = omp_out + omp_in) \ + initializer(omp_priv = unum::usearch::e2m3_bits_t()) +#pragma omp declare reduction(+ : unum::usearch::e3m2_bits_t : omp_out = omp_out + omp_in) \ + initializer(omp_priv = unum::usearch::e3m2_bits_t()) +#endif + /** * @brief An STL-based executor or a "thread-pool" for parallel execution. * Isn't efficient for small batches, as it recreates the threads on every call. @@ -648,6 +1394,9 @@ class executor_stl_t { tasks_per_thread = (tasks / threads_count) + ((tasks % threads_count) != 0); for (std::size_t thread_idx = 1; thread_idx < threads_count; ++thread_idx) { new (&threads_pool[thread_idx - 1]) jthread_t([=]() { +#if USEARCH_USE_NUMKONG + nk_configure_thread_(nk_capabilities()); +#endif for (std::size_t task_idx = thread_idx * tasks_per_thread; task_idx < (std::min)(tasks, thread_idx * tasks_per_thread + tasks_per_thread); ++task_idx) thread_aware_function(thread_idx, task_idx); @@ -674,6 +1423,9 @@ class executor_stl_t { tasks_per_thread = (tasks / threads_count) + ((tasks % threads_count) != 0); for (std::size_t thread_idx = 1; thread_idx < threads_count; ++thread_idx) { new (&threads_pool[thread_idx - 1]) jthread_t([=, &stop]() { +#if USEARCH_USE_NUMKONG + nk_configure_thread_(nk_capabilities()); +#endif for (std::size_t task_idx = thread_idx * tasks_per_thread; task_idx < (std::min)(tasks, thread_idx * tasks_per_thread + tasks_per_thread) && !stop.load(std::memory_order_relaxed); @@ -700,7 +1452,12 @@ class executor_stl_t { return thread_aware_function(0); buffer_gt threads_pool(threads_count_ - 1); for (std::size_t thread_idx = 1; thread_idx < threads_count_; ++thread_idx) - new (&threads_pool[thread_idx - 1]) jthread_t([=]() { thread_aware_function(thread_idx); }); + new (&threads_pool[thread_idx - 1]) jthread_t([=]() { +#if USEARCH_USE_NUMKONG + nk_configure_thread_(nk_capabilities()); +#endif + thread_aware_function(thread_idx); + }); thread_aware_function(0); } }; @@ -709,7 +1466,7 @@ class executor_stl_t { /** * @brief An OpenMP-based executor or a "thread-pool" for parallel execution. - * Is the preferred implementation, when available, and maximum performance is needed. + * Is the preferred implementation, when available, and target environment has OpenMP. */ class executor_openmp_t { public: @@ -718,6 +1475,14 @@ class executor_openmp_t { */ executor_openmp_t(std::size_t threads_count = 0) noexcept { omp_set_num_threads(static_cast(threads_count ? threads_count : std::thread::hardware_concurrency())); +#if USEARCH_USE_NUMKONG + nk_capability_t caps = nk_capabilities(); + nk_configure_thread(caps); +#pragma omp parallel + { + nk_configure_thread(caps); + } +#endif } /** @@ -854,7 +1619,8 @@ class page_allocator_t { #if defined(USEARCH_DEFINED_WINDOWS) return (byte_t*)(::VirtualAlloc(NULL, count_bytes, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); #else - return (byte_t*)mmap(NULL, count_bytes, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + auto* result = mmap(NULL, count_bytes, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + return (result == MAP_FAILED) ? nullptr : (byte_t*)result; #endif } @@ -889,6 +1655,7 @@ template class memory_mapping_allocator_gt { std::size_t last_usage_ = head_size(); std::size_t last_capacity_ = min_capacity(); std::size_t wasted_space_ = 0; + std::size_t total_allocated_ = 0; public: using value_type = byte_t; @@ -899,13 +1666,15 @@ template class memory_mapping_allocator_gt { memory_mapping_allocator_gt() = default; memory_mapping_allocator_gt(memory_mapping_allocator_gt&& other) noexcept : last_arena_(exchange(other.last_arena_, nullptr)), last_usage_(exchange(other.last_usage_, 0)), - last_capacity_(exchange(other.last_capacity_, 0)), wasted_space_(exchange(other.wasted_space_, 0)) {} + last_capacity_(exchange(other.last_capacity_, 0)), wasted_space_(exchange(other.wasted_space_, 0)), + total_allocated_(exchange(other.total_allocated_, 0)) {} memory_mapping_allocator_gt& operator=(memory_mapping_allocator_gt&& other) noexcept { std::swap(last_arena_, other.last_arena_); std::swap(last_usage_, other.last_usage_); std::swap(last_capacity_, other.last_capacity_); std::swap(wasted_space_, other.wasted_space_); + std::swap(total_allocated_, other.total_allocated_); return *this; } @@ -930,6 +1699,7 @@ template class memory_mapping_allocator_gt { last_usage_ = head_size(); last_capacity_ = min_capacity(); wasted_space_ = 0; + total_allocated_ = 0; } /** @@ -959,7 +1729,7 @@ template class memory_mapping_allocator_gt { if (!last_arena_ || (last_usage_ + extended_bytes >= last_capacity_)) { std::size_t new_cap = (std::max)(last_capacity_, ceil2(extended_bytes)) * capacity_multiplier(); byte_t* new_arena = page_allocator_t{}.allocate(new_cap); - if (!new_arena || new_arena == (byte_t*)MAP_FAILED) + if (!new_arena) return nullptr; std::memcpy(new_arena, &last_arena_, sizeof(byte_t*)); std::memcpy(new_arena + sizeof(byte_t*), &new_cap, sizeof(std::size_t)); @@ -968,6 +1738,7 @@ template class memory_mapping_allocator_gt { last_arena_ = new_arena; last_capacity_ = new_cap; last_usage_ = head_size(); + total_allocated_ += new_cap; } wasted_space_ += extended_bytes - count_bytes; @@ -978,17 +1749,7 @@ template class memory_mapping_allocator_gt { * @brief Returns the amount of memory used by the allocator across all arenas. * @return The amount of space in bytes. */ - std::size_t total_allocated() const noexcept { - if (!last_arena_) - return 0; - std::size_t total_used = 0; - std::size_t last_capacity = last_capacity_; - do { - total_used += last_capacity; - last_capacity /= capacity_multiplier(); - } while (last_capacity >= min_capacity()); - return total_used; - } + std::size_t total_allocated() const noexcept { return total_allocated_; } /** * @brief Returns the amount of wasted space due to alignment. @@ -1132,6 +1893,10 @@ template <> struct cast_gt { static bool try_(byte_t const*, std::size_t, byte_t*) noexcept { return false; } }; +template <> struct cast_gt { + static bool try_(byte_t const*, std::size_t, byte_t*) noexcept { return false; } +}; + template <> struct cast_gt { static bool try_(byte_t const*, std::size_t, byte_t*) noexcept { return false; } }; @@ -1200,6 +1965,31 @@ template struct cast_from_i8_gt { } }; +template struct cast_to_u8_gt { + inline static bool try_(byte_t const* input, std::size_t dim, byte_t* output) noexcept { + from_scalar_at const* typed_input = reinterpret_cast(input); + std::uint8_t* typed_output = reinterpret_cast(output); + double magnitude = 0.0; + for (std::size_t i = 0; i != dim; ++i) + magnitude += (double)typed_input[i] * (double)typed_input[i]; + magnitude = std::sqrt(magnitude); + for (std::size_t i = 0; i != dim; ++i) + typed_output[i] = + static_cast(usearch::clamp(typed_input[i] * 255.0 / magnitude, 0.0, 255.0)); + return true; + } +}; + +template struct cast_from_u8_gt { + static bool try_(byte_t const* input, std::size_t dim, byte_t* output) noexcept { + std::uint8_t const* typed_input = reinterpret_cast(input); + to_scalar_at* typed_output = reinterpret_cast(output); + for (std::size_t i = 0; i != dim; ++i) + typed_output[i] = static_cast(typed_input[i]) / 255.f; + return true; + } +}; + template <> struct cast_gt : public cast_from_i8_gt {}; template <> struct cast_gt : public cast_from_i8_gt {}; template <> struct cast_gt : public cast_from_i8_gt {}; @@ -1223,6 +2013,101 @@ template <> struct cast_gt : public cast_to_b1x8_gt {}; template <> struct cast_gt : public cast_from_b1x8_gt {}; template <> struct cast_gt : public cast_to_b1x8_gt {}; +template struct cast_through_f32_gt { + static bool try_(byte_t const* input, std::size_t dim, byte_t* output) noexcept { + from_at const* in = reinterpret_cast(input); + to_at* out = reinterpret_cast(output); + for (std::size_t i = 0; i != dim; ++i) + out[i] = to_at(float(in[i])); + return true; + } +}; + +template <> struct cast_gt { + static bool try_(byte_t const*, std::size_t, byte_t*) noexcept { return false; } +}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_to_i8_gt {}; +template <> struct cast_gt : public cast_from_i8_gt {}; +template <> struct cast_gt : public cast_to_b1x8_gt {}; +template <> struct cast_gt : public cast_from_b1x8_gt {}; + +template <> struct cast_gt { + static bool try_(byte_t const*, std::size_t, byte_t*) noexcept { return false; } +}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_to_i8_gt {}; +template <> struct cast_gt : public cast_from_i8_gt {}; +template <> struct cast_gt : public cast_to_b1x8_gt {}; +template <> struct cast_gt : public cast_from_b1x8_gt {}; + +template <> struct cast_gt { + static bool try_(byte_t const*, std::size_t, byte_t*) noexcept { return false; } +}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_to_i8_gt {}; +template <> struct cast_gt : public cast_from_i8_gt {}; +template <> struct cast_gt : public cast_to_b1x8_gt {}; +template <> struct cast_gt : public cast_from_b1x8_gt {}; + +template <> struct cast_gt { + static bool try_(byte_t const*, std::size_t, byte_t*) noexcept { return false; } +}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_through_f32_gt {}; +template <> struct cast_gt : public cast_to_i8_gt {}; +template <> struct cast_gt : public cast_from_i8_gt {}; +template <> struct cast_gt : public cast_to_b1x8_gt {}; +template <> struct cast_gt : public cast_from_b1x8_gt {}; + +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_from_b1x8_gt {}; +template <> struct cast_gt : public cast_to_b1x8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; +template <> struct cast_gt : public cast_to_u8_gt {}; +template <> struct cast_gt : public cast_from_u8_gt {}; + /** * @brief Type-punned array casting function. * Arguments: input buffer, bytes in input buffer, output buffer. @@ -1237,20 +2122,30 @@ using cast_punned_t = bool (*)(byte_t const*, std::size_t, byte_t*); */ struct casts_punned_t { struct group_t { - cast_punned_t b1x8{}; - cast_punned_t i8{}; - cast_punned_t f16{}; - cast_punned_t bf16{}; - cast_punned_t f32{}; cast_punned_t f64{}; + cast_punned_t f32{}; + cast_punned_t bf16{}; + cast_punned_t f16{}; + cast_punned_t e5m2{}; + cast_punned_t e4m3{}; + cast_punned_t e3m2{}; + cast_punned_t e2m3{}; + cast_punned_t i8{}; + cast_punned_t u8{}; + cast_punned_t b1x8{}; cast_punned_t operator[](scalar_kind_t scalar_kind) const noexcept { switch (scalar_kind) { case scalar_kind_t::f64_k: return f64; case scalar_kind_t::f32_k: return f32; - case scalar_kind_t::f16_k: return f16; case scalar_kind_t::bf16_k: return bf16; + case scalar_kind_t::f16_k: return f16; + case scalar_kind_t::e5m2_k: return e5m2; + case scalar_kind_t::e4m3_k: return e4m3; + case scalar_kind_t::e3m2_k: return e3m2; + case scalar_kind_t::e2m3_k: return e2m3; case scalar_kind_t::i8_k: return i8; + case scalar_kind_t::u8_k: return u8; case scalar_kind_t::b1x8_k: return b1x8; default: return nullptr; } @@ -1261,19 +2156,29 @@ struct casts_punned_t { template static casts_punned_t make() noexcept { casts_punned_t result; - result.from.b1x8 = &cast_gt::try_; - result.from.i8 = &cast_gt::try_; - result.from.f16 = &cast_gt::try_; - result.from.bf16 = &cast_gt::try_; - result.from.f32 = &cast_gt::try_; result.from.f64 = &cast_gt::try_; + result.from.f32 = &cast_gt::try_; + result.from.bf16 = &cast_gt::try_; + result.from.f16 = &cast_gt::try_; + result.from.e5m2 = &cast_gt::try_; + result.from.e4m3 = &cast_gt::try_; + result.from.e3m2 = &cast_gt::try_; + result.from.e2m3 = &cast_gt::try_; + result.from.i8 = &cast_gt::try_; + result.from.u8 = &cast_gt::try_; + result.from.b1x8 = &cast_gt::try_; - result.to.b1x8 = &cast_gt::try_; - result.to.i8 = &cast_gt::try_; - result.to.f16 = &cast_gt::try_; - result.to.bf16 = &cast_gt::try_; - result.to.f32 = &cast_gt::try_; result.to.f64 = &cast_gt::try_; + result.to.f32 = &cast_gt::try_; + result.to.bf16 = &cast_gt::try_; + result.to.f16 = &cast_gt::try_; + result.to.e5m2 = &cast_gt::try_; + result.to.e4m3 = &cast_gt::try_; + result.to.e3m2 = &cast_gt::try_; + result.to.e2m3 = &cast_gt::try_; + result.to.i8 = &cast_gt::try_; + result.to.u8 = &cast_gt::try_; + result.to.b1x8 = &cast_gt::try_; return result; } @@ -1282,9 +2187,14 @@ struct casts_punned_t { switch (scalar_kind) { case scalar_kind_t::f64_k: return casts_punned_t::make(); case scalar_kind_t::f32_k: return casts_punned_t::make(); - case scalar_kind_t::f16_k: return casts_punned_t::make(); case scalar_kind_t::bf16_k: return casts_punned_t::make(); + case scalar_kind_t::f16_k: return casts_punned_t::make(); + case scalar_kind_t::e5m2_k: return casts_punned_t::make(); + case scalar_kind_t::e4m3_k: return casts_punned_t::make(); + case scalar_kind_t::e3m2_k: return casts_punned_t::make(); + case scalar_kind_t::e2m3_k: return casts_punned_t::make(); case scalar_kind_t::i8_k: return casts_punned_t::make(); + case scalar_kind_t::u8_k: return casts_punned_t::make(); case scalar_kind_t::b1x8_k: return casts_punned_t::make(); default: return {}; } @@ -1630,6 +2540,57 @@ struct metric_l2sq_i8_t { } }; +/** + * @brief Cosine (Angular) distance for unsigned 8-bit integers using 32-bit intermediates. + */ +struct metric_cos_u8_t { + using scalar_t = u8_t; + using result_t = f32_t; + + inline result_t operator()(u8_t const* a, u8_t const* b, std::size_t dim) const noexcept { + std::int64_t ab{}, a2{}, b2{}; +#if USEARCH_USE_OPENMP +#pragma omp simd reduction(+ : ab, a2, b2) +#elif defined(USEARCH_DEFINED_CLANG) +#pragma clang loop vectorize(enable) +#elif defined(USEARCH_DEFINED_GCC) +#pragma GCC ivdep +#endif + for (std::size_t i = 0; i != dim; i++) { + std::int32_t ai{a[i]}; + std::int32_t bi{b[i]}; + ab += ai * bi; + a2 += square(ai); + b2 += square(bi); + } + result_t a2f = std::sqrt(static_cast(a2)); + result_t b2f = std::sqrt(static_cast(b2)); + return (ab != 0) ? (1.f - ab / (a2f * b2f)) : 0; + } +}; + +/** + * @brief Squared Euclidean (L2) distance for unsigned 8-bit integers using 32-bit intermediates. + */ +struct metric_l2sq_u8_t { + using scalar_t = u8_t; + using result_t = f32_t; + + inline result_t operator()(u8_t const* a, u8_t const* b, std::size_t dim) const noexcept { + std::int32_t ab_deltas_sq{}; +#if USEARCH_USE_OPENMP +#pragma omp simd reduction(+ : ab_deltas_sq) +#elif defined(USEARCH_DEFINED_CLANG) +#pragma clang loop vectorize(enable) +#elif defined(USEARCH_DEFINED_GCC) +#pragma GCC ivdep +#endif + for (std::size_t i = 0; i != dim; i++) + ab_deltas_sq += square(std::int32_t(a[i]) - std::int32_t(b[i])); + return static_cast(ab_deltas_sq); + } +}; + /** * @brief Haversine distance for the shortest distance between two nodes on * the surface of a 3D sphere, defined with latitude and longitude. @@ -1671,10 +2632,193 @@ enum class metric_punned_signature_t { array_array_state_k, }; +#if USEARCH_USE_NUMKONG + +/** + * @brief Returns the result of `nk_capabilities()`, cached in a function-local static. + * Every call-site that needs the current CPU's capability mask should use this + * instead of caching the value in its own `static` local. + */ +inline nk_capability_t nk_cached_capabilities() noexcept { + static nk_capability_t caps = [] { + nk_capability_t c = nk_capabilities(); + nk_configure_thread(c); + return c; + }(); + return caps; +} + +/** + * @brief Converts a USearch `scalar_kind_t` to the corresponding NumKong `nk_dtype_t`. + * @return The matching dtype, or `(nk_dtype_t)0` when there is no NumKong equivalent. + */ +inline nk_dtype_t scalar_kind_to_nk_dtype(scalar_kind_t sk) noexcept { + switch (sk) { + case scalar_kind_t::f64_k: return (nk_dtype_t)nk_f64_k; + case scalar_kind_t::f32_k: return (nk_dtype_t)nk_f32_k; + case scalar_kind_t::bf16_k: return (nk_dtype_t)nk_bf16_k; + case scalar_kind_t::f16_k: return (nk_dtype_t)nk_f16_k; + case scalar_kind_t::e5m2_k: return (nk_dtype_t)nk_e5m2_k; + case scalar_kind_t::e4m3_k: return (nk_dtype_t)nk_e4m3_k; + case scalar_kind_t::e3m2_k: return (nk_dtype_t)nk_e3m2_k; + case scalar_kind_t::e2m3_k: return (nk_dtype_t)nk_e2m3_k; + case scalar_kind_t::i8_k: return (nk_dtype_t)nk_i8_k; + case scalar_kind_t::u8_k: return (nk_dtype_t)nk_u8_k; + case scalar_kind_t::b1x8_k: return (nk_dtype_t)nk_u1_k; + default: return (nk_dtype_t)0; + } +} + +/** + * @brief One entry in the ISA capability-to-name mapping table. + */ +struct isa_target_t { + nk_capability_t cap; + char const* name; +}; + +/** + * @brief Returns the static table mapping each NumKong capability bit to its human-readable name. + * Both `hardware_acceleration_available` and `hardware_acceleration_compiled` use this table + * together with the bitmasks from `nk_capabilities_available()` / `nk_capabilities_compiled()`. + */ +inline span_gt isa_targets() noexcept { + static isa_target_t const table[] = { + {nk_cap_serial_k, "serial"}, + // x86 + {nk_cap_haswell_k, "haswell"}, + {nk_cap_skylake_k, "skylake"}, + {nk_cap_icelake_k, "icelake"}, + {nk_cap_genoa_k, "genoa"}, + {nk_cap_sapphire_k, "sapphire"}, + {nk_cap_sapphireamx_k, "sapphireamx"}, + {nk_cap_graniteamx_k, "graniteamx"}, + {nk_cap_turin_k, "turin"}, + {nk_cap_sierra_k, "sierra"}, + {nk_cap_alder_k, "alder"}, + {nk_cap_diamond_k, "diamond"}, + // ARM NEON + {nk_cap_neon_k, "neon"}, + {nk_cap_neonhalf_k, "neonhalf"}, + {nk_cap_neonsdot_k, "neonsdot"}, + {nk_cap_neonbfdot_k, "neonbfdot"}, + {nk_cap_neonfhm_k, "neonfhm"}, + {nk_cap_neonfp8_k, "neonfp8"}, + // ARM SVE + {nk_cap_sve_k, "sve"}, + {nk_cap_svehalf_k, "svehalf"}, + {nk_cap_svesdot_k, "svesdot"}, + {nk_cap_svebfdot_k, "svebfdot"}, + {nk_cap_sve2_k, "sve2"}, + {nk_cap_sve2p1_k, "sve2p1"}, + // ARM SME + {nk_cap_sme_k, "sme"}, + {nk_cap_sme2_k, "sme2"}, + {nk_cap_sme2p1_k, "sme2p1"}, + {nk_cap_smef64_k, "smef64"}, + {nk_cap_smefa64_k, "smefa64"}, + {nk_cap_smehalf_k, "smehalf"}, + {nk_cap_smebf16_k, "smebf16"}, + {nk_cap_smelut2_k, "smelut2"}, + {nk_cap_smebi32_k, "smebi32"}, + // RISC-V + {nk_cap_rvv_k, "rvv"}, + {nk_cap_rvvhalf_k, "rvvhalf"}, + {nk_cap_rvvbf16_k, "rvvbf16"}, + {nk_cap_rvvbb_k, "rvvbb"}, + // WebAssembly + {nk_cap_v128relaxed_k, "v128relaxed"}, + // LoongArch + {nk_cap_loongsonasx_k, "loongsonasx"}, + // IBM Power + {nk_cap_powervsx_k, "powervsx"}, + }; + return {table, sizeof(table) / sizeof(table[0])}; +} + +/** + * @brief Returns the human-readable name for a single capability bit. + * @param cap A single `nk_capability_t` bit (not a bitmask of multiple capabilities). + * @return The name string, or "unknown" if the bit is not in the table. + */ +inline char const* capability_name(nk_capability_t cap) noexcept { + span_gt table = isa_targets(); + for (std::size_t i = 0; i != table.size(); ++i) + if (table[i].cap == cap) + return table[i].name; + return "unknown"; +} + +/** + * @brief Formats all ISA targets whose bits are set in `caps` into a comma-separated string. + * @param caps Capability bitmask to filter against. + * @param buf Output buffer. + * @param buf_len Size of the output buffer in bytes. + * @return The number of characters written (excluding the null terminator). + */ +inline std::size_t isa_targets_format(nk_capability_t caps, char* buf, std::size_t buf_len) noexcept { + if (!buf_len) + return 0; + span_gt table = isa_targets(); + char* p = buf; + char* end = buf + buf_len - 1; + std::size_t matched = 0; + for (std::size_t i = 0; i != table.size(); ++i) { + if (!(caps & table[i].cap)) + continue; + std::size_t name_len = std::strlen(table[i].name); + std::size_t needed = name_len + (matched ? 2 : 0); + if (p + needed > end) + break; + if (matched++) + *p++ = ',', *p++ = ' '; + std::memcpy(p, table[i].name, name_len); + p += name_len; + } + *p = '\0'; + return static_cast(p - buf); +} + +/** + * @brief Returns a comma-separated list of ISAs available at runtime (compiled AND supported by CPU). + * @return Pointer to a static null-terminated string. Thread-safe after first call. + */ +inline char const* hardware_acceleration_available() noexcept { + static char buf[1024]; + static bool initialized = false; + if (!initialized) { + nk_cached_capabilities(); // ensures nk_configure_thread is called + isa_targets_format(nk_capabilities_available(), buf, sizeof(buf)); + initialized = true; + } + return buf; +} + +/** + * @brief Returns a comma-separated list of ISAs that were compiled into this binary. + * @return Pointer to a static null-terminated string. Thread-safe after first call. + */ +inline char const* hardware_acceleration_compiled() noexcept { + static char buf[1024]; + static bool initialized = false; + if (!initialized) { + isa_targets_format(nk_capabilities_compiled(), buf, sizeof(buf)); + initialized = true; + } + return buf; +} + +#else + +inline char const* hardware_acceleration_available() noexcept { return "serial"; } +inline char const* hardware_acceleration_compiled() noexcept { return "serial"; } + +#endif + /** * @brief Type-punned metric class, which unlike STL's `std::function` avoids any memory allocations. * It also provides additional APIs to check, if SIMD hardware-acceleration is available. - * Wraps the `simsimd_metric_dense_punned_t` when available. The auto-vectorized backend otherwise. + * Wraps the `nk_metric_dense_punned_t` when available. The auto-vectorized backend otherwise. */ class metric_punned_t { public: @@ -1701,8 +2845,8 @@ class metric_punned_t { metric_kind_t metric_kind_ = metric_kind_t::unknown_k; scalar_kind_t scalar_kind_ = scalar_kind_t::unknown_k; -#if USEARCH_USE_SIMSIMD - simsimd_capability_t isa_kind_ = simsimd_cap_serial_k; +#if USEARCH_USE_NUMKONG + nk_capability_t isa_kind_ = nk_cap_serial_k; #endif public: @@ -1729,7 +2873,7 @@ class metric_punned_t { /** * @brief Creates a metric of a natively supported kind, choosing the best - * available backend internally or from SimSIMD. + * available backend internally or from NumKong. * * @param dimensions The number of elements in the input arrays. * @param metric_kind The kind of metric to use. @@ -1747,12 +2891,8 @@ class metric_punned_t { metric.metric_kind_ = metric_kind; metric.scalar_kind_ = scalar_kind; -#if USEARCH_USE_SIMSIMD - if (!metric.configure_with_simsimd()) + if (!metric.configure_with_numkong()) metric.configure_with_autovec(); -#else - metric.configure_with_autovec(); -#endif return metric; } @@ -1828,27 +2968,11 @@ class metric_punned_t { inline char const* isa_name() const noexcept { if (!*this) return "uninitialized"; - -#if USEARCH_USE_SIMSIMD - switch (isa_kind_) { - case simsimd_cap_serial_k: return "serial"; - case simsimd_cap_neon_k: return "neon"; - case simsimd_cap_neon_i8_k: return "neon_i8"; - case simsimd_cap_neon_f16_k: return "neon_f16"; - case simsimd_cap_neon_bf16_k: return "neon_bf16"; - case simsimd_cap_sve_k: return "sve"; - case simsimd_cap_sve_i8_k: return "sve_i8"; - case simsimd_cap_sve_f16_k: return "sve_f16"; - case simsimd_cap_sve_bf16_k: return "sve_bf16"; - case simsimd_cap_haswell_k: return "haswell"; - case simsimd_cap_skylake_k: return "skylake"; - case simsimd_cap_ice_k: return "ice"; - case simsimd_cap_genoa_k: return "genoa"; - case simsimd_cap_sapphire_k: return "sapphire"; - default: return "unknown"; - } -#endif +#if USEARCH_USE_NUMKONG + return capability_name(isa_kind_); +#else return "serial"; +#endif } inline std::size_t bytes_per_vector() const noexcept { @@ -1860,63 +2984,83 @@ class metric_punned_t { } private: -#if USEARCH_USE_SIMSIMD - bool configure_with_simsimd(simsimd_capability_t simd_caps) noexcept { - simsimd_metric_kind_t kind = simsimd_metric_unknown_k; - simsimd_datatype_t datatype = simsimd_datatype_unknown_k; - simsimd_capability_t allowed = simsimd_cap_any_k; +#if USEARCH_USE_NUMKONG + /** + * @brief Typed invoke template for NumKong kernels. + * + * The accumulator type and IP-reversal flag are selected once in `configure_with_numkong` + * and baked into the member-function pointer stored in `metric_routed_`. + */ + template +#if defined(USEARCH_DEFINED_CLANG) || defined(USEARCH_DEFINED_GCC) + __attribute__((no_sanitize("all"))) +#endif + result_t invoke_numkong(uptr_t a, uptr_t b) const noexcept { + accumulator_at result = 0; + auto function_pointer = (nk_metric_dense_punned_t)(metric_ptr_); + function_pointer(reinterpret_cast(a), reinterpret_cast(b), metric_third_arg_, + &result); + return reverse_ak ? (result_t)(1 - (result_t)result) : (result_t)result; + } + + /// Shorthand: casts an `invoke_numkong` instantiation to the opaque `metric_routed_t` type. + template // + static metric_routed_t numkong_routed() noexcept { + return reinterpret_cast(&metric_punned_t::invoke_numkong); + } + + bool configure_with_numkong(nk_capability_t simd_caps) noexcept { + nk_kernel_kind_t kind = (nk_kernel_kind_t)0; switch (metric_kind_) { - case metric_kind_t::ip_k: kind = simsimd_metric_dot_k; break; - case metric_kind_t::cos_k: kind = simsimd_metric_cos_k; break; - case metric_kind_t::l2sq_k: kind = simsimd_metric_l2sq_k; break; - case metric_kind_t::hamming_k: kind = simsimd_metric_hamming_k; break; - case metric_kind_t::tanimoto_k: kind = simsimd_metric_jaccard_k; break; - case metric_kind_t::jaccard_k: kind = simsimd_metric_jaccard_k; break; - default: break; - } - switch (scalar_kind_) { - case scalar_kind_t::f32_k: datatype = simsimd_datatype_f32_k; break; - case scalar_kind_t::f64_k: datatype = simsimd_datatype_f64_k; break; - case scalar_kind_t::f16_k: datatype = simsimd_datatype_f16_k; break; - case scalar_kind_t::bf16_k: datatype = simsimd_datatype_bf16_k; break; - case scalar_kind_t::i8_k: datatype = simsimd_datatype_i8_k; break; - case scalar_kind_t::b1x8_k: datatype = simsimd_datatype_b8_k; break; - default: break; + case metric_kind_t::ip_k: kind = nk_kernel_dot_k; break; + case metric_kind_t::cos_k: kind = nk_kernel_angular_k; break; + case metric_kind_t::l2sq_k: kind = nk_kernel_sqeuclidean_k; break; + case metric_kind_t::hamming_k: kind = nk_kernel_hamming_k; break; + case metric_kind_t::tanimoto_k: kind = nk_kernel_jaccard_k; break; + case metric_kind_t::jaccard_k: kind = nk_kernel_jaccard_k; break; + default: return false; } - simsimd_metric_dense_punned_t simd_metric = NULL; - simsimd_capability_t simd_kind = simsimd_cap_any_k; - simsimd_find_kernel_punned(kind, datatype, simd_caps, allowed, (simsimd_kernel_punned_t*)&simd_metric, - &simd_kind); + nk_dtype_t datatype = scalar_kind_to_nk_dtype(scalar_kind_); + nk_metric_dense_punned_t simd_metric = NULL; + nk_capability_t simd_kind = nk_cap_any_k; + nk_find_kernel_punned(kind, datatype, simd_caps, (nk_kernel_punned_t*)&simd_metric, &simd_kind); if (simd_metric == nullptr) return false; std::memcpy(&metric_ptr_, &simd_metric, sizeof(simd_metric)); - metric_routed_ = metric_kind_ == metric_kind_t::ip_k - ? reinterpret_cast(&metric_punned_t::invoke_simsimd_reverse) - : reinterpret_cast(&metric_punned_t::invoke_simsimd); + + // Select the typed invoke variant based on the kernel's output dtype to avoid per-call branching. + // The output type depends on both scalar_kind and metric_kind (e.g. i8 dot->i32, i8 l2sq->u32, i8 cos->f32). + nk_dtype_t out_dtype = nk_kernel_output_dtype(kind, datatype); + bool is_ip = (metric_kind_ == metric_kind_t::ip_k); + switch (out_dtype) { + case nk_f64_k: + metric_routed_ = is_ip ? numkong_routed() : numkong_routed(); + break; + case nk_f32_k: + metric_routed_ = is_ip ? numkong_routed() : numkong_routed(); + break; + case nk_i32_k: + metric_routed_ = is_ip ? numkong_routed() : numkong_routed(); + break; + case nk_u32_k: + metric_routed_ = is_ip ? numkong_routed() : numkong_routed(); + break; + default: metric_routed_ = is_ip ? numkong_routed() : numkong_routed(); break; + } isa_kind_ = simd_kind; + + // NumKong binary-set kernels (Hamming, Jaccard/Tanimoto) expect the third argument + // to be the number of dimensions (bits), not the number of bytes. + if (scalar_kind_ == scalar_kind_t::b1x8_k) + metric_third_arg_ = dimensions_; + return true; } - bool configure_with_simsimd() noexcept { - static simsimd_capability_t static_capabilities = simsimd_capabilities(); - return configure_with_simsimd(static_capabilities); - } -#if defined(USEARCH_DEFINED_CLANG) || defined(USEARCH_DEFINED_GCC) - __attribute__((no_sanitize("all"))) -#endif - result_t - invoke_simsimd(uptr_t a, uptr_t b) const noexcept { - simsimd_distance_t result; - // Here `reinterpret_cast` raises warning and UBSan reports an issue... we know what we are doing! - auto function_pointer = (simsimd_metric_dense_punned_t)(metric_ptr_); - function_pointer(reinterpret_cast(a), reinterpret_cast(b), metric_third_arg_, - &result); - return (result_t)result; - } - result_t invoke_simsimd_reverse(uptr_t a, uptr_t b) const noexcept { return 1 - invoke_simsimd(a, b); } + bool configure_with_numkong() noexcept { return configure_with_numkong(nk_cached_capabilities()); } #else - bool configure_with_simsimd() noexcept { return false; } + bool configure_with_numkong() noexcept { return false; } #endif result_t invoke_array_array_third(uptr_t a, uptr_t b) const noexcept { auto function_pointer = (metric_array_array_size_t)(metric_ptr_); @@ -1932,70 +3076,108 @@ class metric_punned_t { switch (metric_kind_) { case metric_kind_t::ip_k: { switch (scalar_kind_) { + case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::bf16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e5m2_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e4m3_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e3m2_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e2m3_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::u8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; default: metric_ptr_ = 0; break; } break; } case metric_kind_t::cos_k: { switch (scalar_kind_) { + case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::bf16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; case scalar_kind_t::f16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e5m2_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e4m3_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e3m2_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e2m3_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; + case scalar_kind_t::u8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; default: metric_ptr_ = 0; break; } break; } case metric_kind_t::l2sq_k: { switch (scalar_kind_) { + case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::bf16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; case scalar_kind_t::f16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e5m2_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e4m3_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e3m2_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e2m3_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; + case scalar_kind_t::u8_k: metric_ptr_ = (uptr_t)&equidimensional_; break; default: metric_ptr_ = 0; break; } break; } case metric_kind_t::pearson_k: { switch (scalar_kind_) { + case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::bf16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e5m2_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; + case scalar_kind_t::e4m3_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; + case scalar_kind_t::e3m2_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; + case scalar_kind_t::e2m3_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; + case scalar_kind_t::i8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::u8_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; default: metric_ptr_ = 0; break; } break; } case metric_kind_t::haversine_k: { switch (scalar_kind_) { - case scalar_kind_t::bf16_k: metric_ptr_ = 0; break; //< Half-precision 2D vectors are silly. - case scalar_kind_t::f16_k: metric_ptr_ = 0; break; //< Half-precision 2D vectors are silly. - case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; default: metric_ptr_ = 0; break; } break; } case metric_kind_t::divergence_k: { switch (scalar_kind_) { + case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::bf16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; case scalar_kind_t::f16_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f32_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; - case scalar_kind_t::f64_k: metric_ptr_ = (uptr_t)&equidimensional_>; break; + case scalar_kind_t::e5m2_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; + case scalar_kind_t::e4m3_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; + case scalar_kind_t::e3m2_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; + case scalar_kind_t::e2m3_k: + metric_ptr_ = (uptr_t)&equidimensional_>; + break; default: metric_ptr_ = 0; break; } break; @@ -2085,10 +3267,9 @@ class exact_search_t { std::size_t wanted, metric_punned_t const& metric, // executor_at&& executor = executor_at{}, progress_at&& progress = progress_at{}) { return operator()( // - metric, // reinterpret_cast(dataset.data()), dataset.size(), dataset.stride_bytes(), // reinterpret_cast(queries.data()), queries.size(), queries.stride_bytes(), // - wanted, executor, progress); + wanted, metric, executor, progress); } template @@ -2098,9 +3279,8 @@ class exact_search_t { std::size_t wanted, metric_punned_t const& metric, executor_at&& executor = executor_at{}, progress_at&& progress = progress_at{}) { - // Allocate temporary memory to store the distance matrix - // Previous version didn't need temporary memory, but the performance was much lower. - // In the new design we keep two buffers - original and transposed, as in-place transpositions + // Allocate temporary memory to store the distance matrix. + // We keep two buffers - original and transposed, as in-place transpositions // of non-rectangular matrixes is expensive. std::size_t tasks_count = dataset_count * queries_count; if (keys_and_distances.size() < tasks_count * 2) @@ -2146,9 +3326,6 @@ class exact_search_t { executor.fixed(queries_count, [&](std::size_t, std::size_t query_idx) { auto start = keys_and_distances_per_query + dataset_count * query_idx; if (wanted > 1) { - // TODO: Consider alternative sorting approaches - // radix_sort(start, start + dataset_count, wanted); - // std::sort(start, start + dataset_count, &smaller_distance); std::partial_sort(start, start + wanted, start + dataset_count, &smaller_distance); } else { auto min_it = std::min_element(start, start + dataset_count, &smaller_distance); @@ -2307,7 +3484,7 @@ template > class kmeans_clustering_ return result.failed("No memory for result outputs!"); std::fill_n(point_to_centroid_index_buffer.data(), points_count, wanted_clusters); - std::fill_n(point_to_centroid_distance_buffer.data(), points_count, std::numeric_limits::max()); + std::fill_n(point_to_centroid_distance_buffer.data(), points_count, (std::numeric_limits::max)()); // Initialize the casting kernel for quantization and export. casts_punned_t casts = casts_punned_t::make(quantization_kind); @@ -2353,7 +3530,7 @@ template > class kmeans_clustering_ auto start_time = std::chrono::high_resolution_clock::now(); std::size_t iterations = 0; std::size_t const min_points_shifted_per_iteration = static_cast(min_shifts * points_count); - f64_t last_aggregate_distance = std::numeric_limits::max(); + f64_t last_aggregate_distance = (std::numeric_limits::max)(); while (iterations < max_iterations) { iterations++; @@ -2364,7 +3541,7 @@ template > class kmeans_clustering_ byte_t const* quantized_point = points_quantized_buffer.data() + points_idx * stride_per_vector_quantized; byte_t const* quantized_centroids = centroids_quantized_buffer.data(); - distance_t closest_distance_local = std::numeric_limits::max(); + distance_t closest_distance_local = (std::numeric_limits::max)(); std::size_t closest_idx_local = 0; for (std::size_t centroid_idx = 0; centroid_idx < wanted_clusters; centroid_idx++) { byte_t const* quantized_centroid = quantized_centroids + centroid_idx * stride_per_vector_quantized; @@ -2738,12 +3915,17 @@ class flat_hash_multi_set_gt { equals_t const& equals) : index_(index), parent_(parent), query_(query), equals_(equals) {} - // Pre-increment + // Pre-increment: advance past tombstones and non-matching entries, + // stopping at the next matching live entry or an empty slot. equal_iterator_gt& operator++() { do { index_ = (index_ + 1) & (parent_->capacity_slots_ - 1); - } while (!equals_(parent_->slot_ref(index_).element, query_) && - (parent_->slot_ref(index_).header.populated & parent_->slot_ref(index_).mask)); + auto slot = parent_->slot_ref(index_); + bool is_empty = ~slot.header.populated & slot.mask; + bool is_match = !(slot.header.deleted & slot.mask) && equals_(slot.element, query_); + if (is_empty || is_match) + break; + } while (true); return *this; } diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 104d79bf1..8cee13ed3 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -6,11 +6,6 @@ find_package(JNI REQUIRED) # Define JNI library sources set(USEARCH_JNI_SOURCES cloud/unum/usearch/cloud_unum_usearch_Index.cpp) -# Add SimSIMD source if enabled -if (USEARCH_USE_SIMSIMD) - list(APPEND USEARCH_JNI_SOURCES ../simsimd/c/lib.c) -endif () - # Build JNI shared library add_library(usearch_jni SHARED ${USEARCH_JNI_SOURCES}) diff --git a/java/README.md b/java/README.md index 0f67d78d8..6fbc84d5e 100644 --- a/java/README.md +++ b/java/README.md @@ -20,7 +20,7 @@ repositories { task downloadUSearchJar { doLast { def usearchVersion = '2.24.0' - def usearchUrl = "https://github.com/unum-cloud/usearch/releases/download/v${usearchVersion}/usearch-${usearchVersion}.jar" + def usearchUrl = "https://github.com/unum-cloud/USearch/releases/download/v${usearchVersion}/usearch-${usearchVersion}.jar" def usearchFile = file("lib/usearch-${usearchVersion}.jar") usearchFile.parentFile.mkdirs() diff --git a/java/cloud/unum/usearch/Index.java b/java/cloud/unum/usearch/Index.java index c15958437..e963cb9a6 100644 --- a/java/cloud/unum/usearch/Index.java +++ b/java/cloud/unum/usearch/Index.java @@ -64,9 +64,9 @@ * management. Always use try-with-resources or explicitly call {@link #close()} * to free native memory.

* - * @see USearch GitHub + * @see USearch GitHub * Repository - * @see USearch + * @see USearch * Documentation */ package cloud.unum.usearch; @@ -139,11 +139,36 @@ public static final class Quantization { */ public static final String FLOAT16 = "f16"; + /** + * FP8 E5M2 (IEEE 754-like, 1 sign + 5 exponent + 2 mantissa) + */ + public static final String E5M2 = "e5m2"; + + /** + * FP8 E4M3 (OCP, 1 sign + 4 exponent + 3 mantissa) + */ + public static final String E4M3 = "e4m3"; + + /** + * FP6 E3M2 (1 sign + 3 exponent + 2 mantissa) + */ + public static final String E3M2 = "e3m2"; + + /** + * FP6 E2M3 (1 sign + 2 exponent + 3 mantissa) + */ + public static final String E2M3 = "e2m3"; + /** * 8-bit integer quantization */ public static final String INT8 = "i8"; + /** + * 8-bit unsigned integer quantization + */ + public static final String UINT8 = "u8"; + /** * Binary quantization (1 bit per dimension, 8 dimensions per word) */ @@ -615,6 +640,48 @@ public int searchInto(java.nio.ByteBuffer query, java.nio.LongBuffer results, lo return found; } + /** + * Adds uint8 quantized vector to index. + * + * @param key vector identifier + * @param vector uint8 quantized vector data (stored as byte[]) + */ + public void addU8(long key, byte vector[]) { + if (c_ptr == 0) { + throw new IllegalStateException("Index already closed"); + } + c_add_u8(c_ptr, key, vector); + } + + /** + * Searches using uint8 quantized query vector. + * + * @param vector uint8 quantized query vector (stored as byte[]) + * @param count number of neighbors to find + * @return array of neighbor keys + */ + public long[] searchU8(byte vector[], long count) { + if (c_ptr == 0) { + throw new IllegalStateException("Index already closed"); + } + return c_search_u8(c_ptr, vector, count); + } + + /** + * Retrieves uint8 vector into provided byte buffer. + * + * @param key vector identifier + * @param buffer buffer to populate with vector data + * @throws IllegalArgumentException if key not found or buffer size + * incorrect + */ + public void getIntoU8(long key, byte[] buffer) { + if (c_ptr == 0) { + throw new IllegalStateException("Index already closed"); + } + c_get_into_u8(c_ptr, key, buffer); + } + /** * Retrieves vector into provided float buffer. * @@ -771,13 +838,31 @@ public String getScalarKind() { return c_scalar_kind(c_ptr); } + /** + * Returns a comma-separated list of SIMD capabilities available on the current platform at runtime. + * + * @return comma-separated string of runtime capability names (e.g., "serial, haswell, skylake") + */ + public static String hardwareAccelerationAvailableString() { + return c_hardware_acceleration_available_string(); + } + + /** + * Returns a comma-separated list of SIMD capabilities compiled into this library build. + * + * @return comma-separated string of compiled capability names + */ + public static String hardwareAccelerationCompiledString() { + return c_hardware_acceleration_compiled_string(); + } + /** * Returns all SIMD capabilities available on the current platform at runtime. * * @return array of runtime capability names (e.g., ["serial", "haswell", "skylake", "neon"]) */ public static String[] hardwareAccelerationAvailable() { - return c_hardware_acceleration_available(); + return c_hardware_acceleration_available_string().split(", "); } /** @@ -786,7 +871,7 @@ public static String[] hardwareAccelerationAvailable() { * @return array of compiled capability names based on preprocessor macros */ public static String[] hardwareAccelerationCompiled() { - return c_hardware_acceleration_compiled(); + return c_hardware_acceleration_compiled_string().split(", "); } /** @@ -1068,9 +1153,9 @@ private static native long c_create( private static native String c_scalar_kind(long ptr); - private static native String[] c_hardware_acceleration_available(); + private static native String c_hardware_acceleration_available_string(); - private static native String[] c_hardware_acceleration_compiled(); + private static native String c_hardware_acceleration_compiled_string(); private static native String c_library_version(); @@ -1085,18 +1170,24 @@ private static native long c_create( private static native void c_add_i8(long ptr, long key, byte vector[]); + private static native void c_add_u8(long ptr, long key, byte vector[]); + private static native long[] c_search_f32(long ptr, float vector[], long count); private static native long[] c_search_f64(long ptr, double vector[], long count); private static native long[] c_search_i8(long ptr, byte vector[], long count); + private static native long[] c_search_u8(long ptr, byte vector[], long count); + private static native void c_get_into_f32(long ptr, long key, float buffer[]); private static native void c_get_into_f64(long ptr, long key, double buffer[]); private static native void c_get_into_i8(long ptr, long key, byte buffer[]); + private static native void c_get_into_u8(long ptr, long key, byte buffer[]); + // ByteBuffer overloads for zero-copy operations: private static native void c_add_f32_buffer(long ptr, long key, java.nio.FloatBuffer vector); diff --git a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp index 685bdc8d8..510e896fd 100644 --- a/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp +++ b/java/cloud/unum/usearch/cloud_unum_usearch_Index.cpp @@ -12,6 +12,7 @@ using namespace unum; using f32_span_t = unum::usearch::span_gt; using f64_span_t = unum::usearch::span_gt; using i8_span_t = unum::usearch::span_gt; +using u8_span_t = unum::usearch::span_gt; static_assert(sizeof(jlong) == sizeof(index_dense_t::vector_key_t)); static inline jsize to_jsize(JNIEnv* env, std::size_t n) { @@ -469,6 +470,92 @@ JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1get_1into_1i8(JNIEnv* en (*env).ReleaseByteArrayElements(buffer, buffer_data, JNI_COMMIT); } +JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add_1u8( // + JNIEnv* env, jclass, jlong c_ptr, jlong key, jbyteArray vector) { + + jbyte* vector_data = (*env).GetByteArrayElements(vector, 0); + jsize vector_length = (*env).GetArrayLength(vector); + + auto index = reinterpret_cast(c_ptr); + size_t dimensions = index->dimensions(); + + using vector_key_t = typename index_dense_t::vector_key_t; + using add_result_t = typename index_dense_t::add_result_t; + + if (vector_length % dimensions != 0) { + (*env).ReleaseByteArrayElements(vector, vector_data, 0); + jclass jc = (*env).FindClass("java/lang/IllegalArgumentException"); + if (jc) + (*env).ThrowNew(jc, "Vector length must be a multiple of dimensions"); + return; + } + + size_t num_vectors = vector_length / dimensions; + for (size_t i = 0; i < num_vectors; i++) { + u8_span_t vector_span = u8_span_t{reinterpret_cast(vector_data + i * dimensions), dimensions}; + add_result_t result = index->add(static_cast(key + i), vector_span); + if (!result) { + (*env).ReleaseByteArrayElements(vector, vector_data, 0); + jclass jc = (*env).FindClass("java/lang/Error"); + if (jc) + (*env).ThrowNew(jc, result.error.release()); + return; + } + } + + (*env).ReleaseByteArrayElements(vector, vector_data, 0); +} + +JNIEXPORT jlongArray JNICALL Java_cloud_unum_usearch_Index_c_1search_1u8( // + JNIEnv* env, jclass, jlong c_ptr, jbyteArray vector, jlong wanted) { + + jbyte* vector_data = (*env).GetByteArrayElements(vector, 0); + jsize vector_dims = (*env).GetArrayLength(vector); + u8_span_t vector_span = + u8_span_t{reinterpret_cast(vector_data), static_cast(vector_dims)}; + + using vector_key_t = typename index_dense_t::vector_key_t; + using search_result_t = typename index_dense_t::search_result_t; + + search_result_t result = + reinterpret_cast(c_ptr)->search(vector_span, static_cast(wanted)); + (*env).ReleaseByteArrayElements(vector, vector_data, 0); + + if (result) { + std::size_t found = result.count; + jlongArray matches = (*env).NewLongArray(to_jsize(env, found)); + if (matches == NULL) + return NULL; + + jlong* matches_data = (*env).GetLongArrayElements(matches, 0); + result.dump_to(reinterpret_cast(matches_data)); + (*env).ReleaseLongArrayElements(matches, matches_data, JNI_COMMIT); + + return matches; + } else { + jclass jc = (*env).FindClass("java/lang/Error"); + if (jc) + (*env).ThrowNew(jc, result.error.release()); + return NULL; + } +} + +JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1get_1into_1u8(JNIEnv* env, jclass, jlong c_ptr, jlong key, + jbyteArray buffer) { + auto index = reinterpret_cast(c_ptr); + jbyte* buffer_data = (*env).GetByteArrayElements(buffer, 0); + + if (index->get(key, reinterpret_cast(buffer_data)) == 0) { + (*env).ReleaseByteArrayElements(buffer, buffer_data, JNI_ABORT); + jclass jc = env->FindClass("java/lang/IllegalArgumentException"); + if (jc) { + env->ThrowNew(jc, "key not found"); + } + return; + } + (*env).ReleaseByteArrayElements(buffer, buffer_data, JNI_COMMIT); +} + JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add_1f32_1buffer( // JNIEnv* env, jclass, jlong c_ptr, jlong key, jobject vector_buffer) { @@ -873,114 +960,14 @@ JNIEXPORT jint JNICALL Java_cloud_unum_usearch_Index_c_1search_1into_1i8_1buffer } } -JNIEXPORT jobjectArray JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1available(JNIEnv* env, jclass) { -#if USEARCH_USE_SIMSIMD - simsimd_capability_t caps = simsimd_capabilities(); - - // Define capability mappings - struct { - simsimd_capability_t cap; - const char* name; - } capabilities[] = { - {simsimd_cap_serial_k, "serial"}, {simsimd_cap_haswell_k, "haswell"}, - {simsimd_cap_skylake_k, "skylake"}, {simsimd_cap_ice_k, "ice"}, - {simsimd_cap_genoa_k, "genoa"}, {simsimd_cap_sapphire_k, "sapphire"}, - {simsimd_cap_turin_k, "turin"}, {simsimd_cap_sierra_k, "sierra"}, - {simsimd_cap_neon_k, "neon"}, {simsimd_cap_neon_i8_k, "neon_i8"}, - {simsimd_cap_neon_f16_k, "neon_f16"}, {simsimd_cap_neon_bf16_k, "neon_bf16"}, - {simsimd_cap_sve_k, "sve"}, {simsimd_cap_sve_i8_k, "sve_i8"}, - {simsimd_cap_sve_f16_k, "sve_f16"}, {simsimd_cap_sve_bf16_k, "sve_bf16"}, - {simsimd_cap_sve2_k, "sve2"}, {simsimd_cap_sve2p1_k, "sve2p1"}, - }; - int const cap_count = sizeof(capabilities) / sizeof(capabilities[0]); - - // Count supported capabilities - int supported_count = 0; - for (int i = 0; i < cap_count; i++) - if (caps & capabilities[i].cap) - supported_count++; - - // Create Java string array - jclass stringClass = env->FindClass("java/lang/String"); - jobjectArray result = env->NewObjectArray(supported_count, stringClass, nullptr); - - int index = 0; - for (int i = 0; i < cap_count; i++) { - if (caps & capabilities[i].cap) { - jstring capName = env->NewStringUTF(capabilities[i].name); - env->SetObjectArrayElement(result, index++, capName); - env->DeleteLocalRef(capName); - } - } - - return result; -#else - // If SimSIMD is not enabled, return only serial - jclass stringClass = env->FindClass("java/lang/String"); - jobjectArray result = env->NewObjectArray(1, stringClass, nullptr); - jstring serialCap = env->NewStringUTF("serial"); - env->SetObjectArrayElement(result, 0, serialCap); - env->DeleteLocalRef(serialCap); - return result; -#endif +JNIEXPORT jstring JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1available_1string(JNIEnv* env, + jclass) { + return env->NewStringUTF(hardware_acceleration_available()); } -JNIEXPORT jobjectArray JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1compiled(JNIEnv* env, jclass) { -#if USEARCH_USE_SIMSIMD - // Define compile-time capabilities based on preprocessor macros - struct { - int compiled; - char const* name; - } compiled_capabilities[] = { - {1, "serial"}, // Always available - {SIMSIMD_TARGET_HASWELL, "haswell"}, - {SIMSIMD_TARGET_SKYLAKE, "skylake"}, - {SIMSIMD_TARGET_ICE, "ice"}, - {SIMSIMD_TARGET_GENOA, "genoa"}, - {SIMSIMD_TARGET_SAPPHIRE, "sapphire"}, - {SIMSIMD_TARGET_TURIN, "turin"}, - {SIMSIMD_TARGET_SIERRA, "sierra"}, - {SIMSIMD_TARGET_NEON, "neon"}, - {SIMSIMD_TARGET_NEON_I8, "neon_i8"}, - {SIMSIMD_TARGET_NEON_F16, "neon_f16"}, - {SIMSIMD_TARGET_NEON_BF16, "neon_bf16"}, - {SIMSIMD_TARGET_SVE, "sve"}, - {SIMSIMD_TARGET_SVE_I8, "sve_i8"}, - {SIMSIMD_TARGET_SVE_F16, "sve_f16"}, - {SIMSIMD_TARGET_SVE_BF16, "sve_bf16"}, - {SIMSIMD_TARGET_SVE2, "sve2"}, - }; - int const cap_count = sizeof(compiled_capabilities) / sizeof(compiled_capabilities[0]); - - // Count compiled capabilities - int compiled_count = 0; - for (int i = 0; i < cap_count; i++) - if (compiled_capabilities[i].compiled) - compiled_count++; - - // Create Java string array - jclass stringClass = env->FindClass("java/lang/String"); - jobjectArray result = env->NewObjectArray(compiled_count, stringClass, nullptr); - - int index = 0; - for (int i = 0; i < cap_count; i++) { - if (compiled_capabilities[i].compiled) { - jstring capName = env->NewStringUTF(compiled_capabilities[i].name); - env->SetObjectArrayElement(result, index++, capName); - env->DeleteLocalRef(capName); - } - } - - return result; -#else - // If SimSIMD is not enabled, return only serial - jclass stringClass = env->FindClass("java/lang/String"); - jobjectArray result = env->NewObjectArray(1, stringClass, nullptr); - jstring serialCap = env->NewStringUTF("serial"); - env->SetObjectArrayElement(result, 0, serialCap); - env->DeleteLocalRef(serialCap); - return result; -#endif +JNIEXPORT jstring JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1compiled_1string(JNIEnv* env, + jclass) { + return env->NewStringUTF(hardware_acceleration_compiled()); } JNIEXPORT jstring JNICALL Java_cloud_unum_usearch_Index_c_1library_1version(JNIEnv* env, jclass) { @@ -991,8 +978,8 @@ JNIEXPORT jstring JNICALL Java_cloud_unum_usearch_Index_c_1library_1version(JNIE } JNIEXPORT jboolean JNICALL Java_cloud_unum_usearch_Index_c_1uses_1dynamic_1dispatch(JNIEnv* env, jclass) { -#if USEARCH_USE_SIMSIMD - return simsimd_uses_dynamic_dispatch() ? JNI_TRUE : JNI_FALSE; +#if USEARCH_USE_NUMKONG + return nk_uses_dynamic_dispatch() ? JNI_TRUE : JNI_FALSE; #else return JNI_FALSE; #endif diff --git a/java/cloud/unum/usearch/cloud_unum_usearch_Index.h b/java/cloud/unum/usearch/cloud_unum_usearch_Index.h index 16cbc2110..3cf28bf6e 100644 --- a/java/cloud/unum/usearch/cloud_unum_usearch_Index.h +++ b/java/cloud/unum/usearch/cloud_unum_usearch_Index.h @@ -145,18 +145,18 @@ JNIEXPORT jstring JNICALL Java_cloud_unum_usearch_Index_c_1scalar_1kind /* * Class: cloud_unum_usearch_Index - * Method: c_hardware_acceleration_available - * Signature: ()[Ljava/lang/String; + * Method: c_hardware_acceleration_available_string + * Signature: ()Ljava/lang/String; */ -JNIEXPORT jobjectArray JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1available +JNIEXPORT jstring JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1available_1string (JNIEnv *, jclass); /* * Class: cloud_unum_usearch_Index - * Method: c_hardware_acceleration_compiled - * Signature: ()[Ljava/lang/String; + * Method: c_hardware_acceleration_compiled_string + * Signature: ()Ljava/lang/String; */ -JNIEXPORT jobjectArray JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1compiled +JNIEXPORT jstring JNICALL Java_cloud_unum_usearch_Index_c_1hardware_1acceleration_1compiled_1string (JNIEnv *, jclass); /* @@ -207,6 +207,14 @@ JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add_1f64 JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add_1i8 (JNIEnv *, jclass, jlong, jlong, jbyteArray); +/* + * Class: cloud_unum_usearch_Index + * Method: c_add_u8 + * Signature: (JJ[B)V + */ +JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1add_1u8 + (JNIEnv *, jclass, jlong, jlong, jbyteArray); + /* * Class: cloud_unum_usearch_Index * Method: c_search_f32 @@ -231,6 +239,14 @@ JNIEXPORT jlongArray JNICALL Java_cloud_unum_usearch_Index_c_1search_1f64 JNIEXPORT jlongArray JNICALL Java_cloud_unum_usearch_Index_c_1search_1i8 (JNIEnv *, jclass, jlong, jbyteArray, jlong); +/* + * Class: cloud_unum_usearch_Index + * Method: c_search_u8 + * Signature: (J[BJ)[J + */ +JNIEXPORT jlongArray JNICALL Java_cloud_unum_usearch_Index_c_1search_1u8 + (JNIEnv *, jclass, jlong, jbyteArray, jlong); + /* * Class: cloud_unum_usearch_Index * Method: c_get_into_f32 @@ -255,6 +271,14 @@ JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1get_1into_1f64 JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1get_1into_1i8 (JNIEnv *, jclass, jlong, jlong, jbyteArray); +/* + * Class: cloud_unum_usearch_Index + * Method: c_get_into_u8 + * Signature: (JJ[B)V + */ +JNIEXPORT void JNICALL Java_cloud_unum_usearch_Index_c_1get_1into_1u8 + (JNIEnv *, jclass, jlong, jlong, jbyteArray); + /* * Class: cloud_unum_usearch_Index * Method: c_add_f32_buffer diff --git a/javascript/lib.cpp b/javascript/lib.cpp index e62a874b0..8d87a1430 100644 --- a/javascript/lib.cpp +++ b/javascript/lib.cpp @@ -432,7 +432,24 @@ Napi::Value exactSearch(Napi::CallbackInfo const& ctx) { return result_js; } +Napi::Value version(Napi::CallbackInfo const& info) { + static char buf[32]; + std::snprintf(buf, sizeof(buf), "%d.%d.%d", USEARCH_VERSION_MAJOR, USEARCH_VERSION_MINOR, USEARCH_VERSION_PATCH); + return Napi::String::New(info.Env(), buf); +} + +Napi::Value hardwareAccelerationCompiled(Napi::CallbackInfo const& info) { + return Napi::String::New(info.Env(), unum::usearch::hardware_acceleration_compiled()); +} + +Napi::Value hardwareAccelerationAvailable(Napi::CallbackInfo const& info) { + return Napi::String::New(info.Env(), unum::usearch::hardware_acceleration_available()); +} + Napi::Object InitAll(Napi::Env env, Napi::Object exports) { + exports.Set("version", Napi::Function::New(env, version)); + exports.Set("hardwareAccelerationCompiled", Napi::Function::New(env, hardwareAccelerationCompiled)); + exports.Set("hardwareAccelerationAvailable", Napi::Function::New(env, hardwareAccelerationAvailable)); exports.Set("exactSearch", Napi::Function::New(env, exactSearch)); return CompiledIndex::Init(env, exports); } diff --git a/javascript/usearch.ts b/javascript/usearch.ts index 109a07bd3..e921d3c7c 100644 --- a/javascript/usearch.ts +++ b/javascript/usearch.ts @@ -5,7 +5,7 @@ import { getFileName, getRoot } from "bindings"; const compiled: Compiled = build(getBuildDir(getDirName())); -type Vector = Float32Array | Float64Array | Int8Array; +type Vector = Float32Array | Float64Array | Int8Array | Uint8Array; type Matrix = Vector[]; type VectorOrMatrix = Vector | Matrix; @@ -68,11 +68,16 @@ export enum MetricKind { */ export enum ScalarKind { Unknown = "unknown", - F32 = "f32", F64 = "f64", - F16 = "f16", + F32 = "f32", BF16 = "bf16", + F16 = "f16", + E5M2 = "e5m2", + E4M3 = "e4m3", + E3M2 = "e3m2", + E2M3 = "e2m3", I8 = "i8", + U8 = "u8", B1 = "b1", } @@ -167,7 +172,8 @@ function isVector(vectors: unknown) { return ( vectors instanceof Float32Array || vectors instanceof Float64Array || - vectors instanceof Int8Array + vectors instanceof Int8Array || + vectors instanceof Uint8Array ); } @@ -541,7 +547,8 @@ export class Index { type NumberArrayConstructor = | Float64ArrayConstructor | Float32ArrayConstructor - | Int8ArrayConstructor; + | Int8ArrayConstructor + | Uint8ArrayConstructor; /** * Performs an exact search on the given dataset to find the best matching vectors for each query. * @@ -606,7 +613,8 @@ export function exactSearch( let targetType: NumberArrayConstructor; if (dataset instanceof Float64Array) targetType = Float64Array; else if (dataset instanceof Int8Array) targetType = Int8Array; - else targetType = Float32Array; // default to Float32Array if dataset is not Float64Array or Int8Array + else if (dataset instanceof Uint8Array) targetType = Uint8Array; + else targetType = Float32Array; // default to Float32Array if dataset is not Float64Array, Int8Array, or Uint8Array dataset = normalizeVectors(dataset, dimensions, targetType); queries = normalizeVectors(queries, dimensions, targetType); diff --git a/numkong b/numkong new file mode 160000 index 000000000..14daf4005 --- /dev/null +++ b/numkong @@ -0,0 +1 @@ +Subproject commit 14daf40059045660775ec39d161f2b51e0a0f2dc diff --git a/objc/README.md b/objc/README.md index 6e14836a3..4ccba31cb 100644 --- a/objc/README.md +++ b/objc/README.md @@ -10,7 +10,7 @@ To install it, simply add the following line to your `Package.swift`: ```swift dependencies: [ - .package(url: "https://github.com/unum-cloud/usearch", .upToNextMajor(from: "2.0.0")) + .package(url: "https://github.com/unum-cloud/USearch", .upToNextMajor(from: "2.0.0")) ] ``` diff --git a/objc/USearchObjective.mm b/objc/USearchObjective.mm index c8b364d86..819d729f4 100644 --- a/objc/USearchObjective.mm +++ b/objc/USearchObjective.mm @@ -55,20 +55,38 @@ metric_kind_t to_native_metric(USearchMetric m) { scalar_kind_t to_native_scalar(USearchScalar m) { switch (m) { - case USearchScalarI8: - return scalar_kind_t::i8_k; + case USearchScalarF64: + return scalar_kind_t::f64_k; - case USearchScalarF16: - return scalar_kind_t::f16_k; + case USearchScalarF32: + return scalar_kind_t::f32_k; case USearchScalarBF16: return scalar_kind_t::bf16_k; - case USearchScalarF32: - return scalar_kind_t::f32_k; + case USearchScalarF16: + return scalar_kind_t::f16_k; - case USearchScalarF64: - return scalar_kind_t::f64_k; + case USearchScalarE5M2: + return scalar_kind_t::e5m2_k; + + case USearchScalarE4M3: + return scalar_kind_t::e4m3_k; + + case USearchScalarE3M2: + return scalar_kind_t::e3m2_k; + + case USearchScalarE2M3: + return scalar_kind_t::e2m3_k; + + case USearchScalarI8: + return scalar_kind_t::i8_k; + + case USearchScalarU8: + return scalar_kind_t::u8_k; + + case USearchScalarB1: + return scalar_kind_t::b1x8_k; default: return scalar_kind_t::unknown_k; @@ -283,6 +301,57 @@ - (UInt32)getHalf:(USearchKey)key return static_cast(result); } +- (void)addU8:(USearchKey)key + vector:(uint8_t const *_Nonnull)vector { + add_result_t result = _native->add(key, (u8_t const *)vector); + + if (!result) { + @throw [NSException exceptionWithName:@"Can't add to index" + reason:[NSString stringWithUTF8String:result.error.release()] + userInfo:nil]; + } +} + +- (UInt32)searchU8:(uint8_t const *_Nonnull)vector + count:(UInt32)wanted + keys:(USearchKey *_Nullable)keys + distances:(Float32 *_Nullable)distances { + search_result_t result = _native->search((u8_t const *)vector, static_cast(wanted)); + + if (!result) { + @throw [NSException exceptionWithName:@"Can't find in index" + reason:[NSString stringWithUTF8String:result.error.release()] + userInfo:nil]; + } + + std::size_t found = result.dump_to(keys, distances); + return static_cast(found); +} + +- (UInt32)getU8:(USearchKey)key + vector:(void *_Nonnull)vector + count:(UInt32)wanted { + std::size_t result = _native->get(key, (u8_t*)vector, static_cast(wanted)); + return static_cast(result); +} + +- (UInt32)filteredSearchU8:(uint8_t const *_Nonnull)vector + count:(UInt32)wanted + filter:(USearchFilterFn)predicate + keys:(USearchKey *_Nullable)keys + distances:(Float32 *_Nullable)distances { + search_result_t result = _native->filtered_search((u8_t const *) vector, static_cast(wanted), predicate); + + if (!result) { + @throw [NSException exceptionWithName:@"Can't find in index" + reason:[NSString stringWithUTF8String:result.error.release()] + userInfo:nil]; + } + + std::size_t found = result.dump_to(keys, distances); + return static_cast(found); +} + - (void)clear { _native->clear(); } diff --git a/objc/include/USearchObjective.h b/objc/include/USearchObjective.h index 316067740..4aa71f2f8 100644 --- a/objc/include/USearchObjective.h +++ b/objc/include/USearchObjective.h @@ -5,12 +5,17 @@ NS_ASSUME_NONNULL_BEGIN typedef NS_ENUM(NSUInteger, USearchScalar) { + USearchScalarF64, USearchScalarF32, + USearchScalarBF16, USearchScalarF16, - USearchScalarF64, + USearchScalarE5M2, + USearchScalarE4M3, + USearchScalarE3M2, + USearchScalarE2M3, USearchScalarI8, + USearchScalarU8, USearchScalarB1, - USearchScalarBF16, }; typedef NS_ENUM(NSUInteger, USearchMetric) { @@ -197,6 +202,51 @@ API_AVAILABLE(ios(13.0), macos(10.15), tvos(13.0), watchos(6.0)) vector:(void *_Nonnull)vector count:(UInt32)count NS_SWIFT_NAME(getHalf(key:vector:count:)); +/** + * @brief Adds a labeled vector to the index. + * @param vector Uint8 vector. + */ +- (void)addU8:(USearchKey)key + vector:(uint8_t const *_Nonnull)vector NS_SWIFT_NAME(addU8(key:vector:)); + +/** + * @brief Approximate nearest neighbors search. + * @param vector Uint8 query vector. + * @param count Upper limit on the number of matches to retrieve. + * @param keys Optional output buffer for keys of approximate neighbors. + * @param distances Optional output buffer for (increasing) distances to approximate neighbors. + * @return Number of matches exported to `keys` and `distances`. + */ +- (UInt32)searchU8:(uint8_t const *_Nonnull)vector + count:(UInt32)count + keys:(USearchKey *_Nullable)keys + distances:(Float32 *_Nullable)distances NS_SWIFT_NAME(searchU8(vector:count:keys:distances:)); + +/** + * @brief Retrieves a labeled uint8 vector from the index. + * @param vector A buffer to store the vector. + * @param count For multi-indexes, the number of vectors to retrieve. + * @return Number of vectors exported to `vector`. + */ +- (UInt32)getU8:(USearchKey)key + vector:(void *_Nonnull)vector + count:(UInt32)count NS_SWIFT_NAME(getU8(key:vector:count:)); + +/** + * @brief Approximate nearest neighbors search with filtering. + * @param vector Uint8 query vector. + * @param count Upper limit on the number of matches to retrieve. + * @param filter Closure called for each key, determining whether to include or skip key in the results. + * @param keys Optional output buffer for keys of approximate neighbors. + * @param distances Optional output buffer for (increasing) distances to approximate neighbors. + * @return Number of matches exported to `keys` and `distances`. + */ +- (UInt32)filteredSearchU8:(uint8_t const *_Nonnull)vector + count:(UInt32)wanted + filter:(USearchFilterFn)predicate + keys:(USearchKey *_Nullable)keys + distances:(Float32 *_Nullable)distances NS_SWIFT_NAME(filteredSearchU8(vector:count:filter:keys:distances:)); + - (Boolean)contains:(USearchKey)key NS_SWIFT_NAME(contains(key:)); - (UInt32)count:(USearchKey)key NS_SWIFT_NAME(count(key:)); diff --git a/package-lock.json b/package-lock.json deleted file mode 100644 index a73f97127..000000000 --- a/package-lock.json +++ /dev/null @@ -1,1615 +0,0 @@ -{ - "name": "usearch", - "version": "2.24.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "usearch", - "version": "2.21.3", - "hasInstallScript": true, - "license": "Apache 2.0", - "dependencies": { - "bindings": "^1.5.0", - "node-addon-api": "^8.5.0", - "node-gyp-build": "^4.8.4" - }, - "devDependencies": { - "@types/bindings": "^1.5.5", - "@types/node": "^24.3.0", - "node-gyp": "^11.4.2", - "prebuildify": "^6.0.1", - "typescript": "^5.9.2" - }, - "engines": { - "node": "~10 >=10.20 || >=12.17" - } - }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", - "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "dev": true, - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@isaacs/fs-minipass": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", - "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^7.0.4" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@npmcli/agent": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/@npmcli/agent/-/agent-3.0.0.tgz", - "integrity": "sha512-S79NdEgDQd/NGCay6TCoVzXSj74skRZIKJcpJjC5lOq34SZzyI6MqtiiWoiVWoVrTcGjNeC4ipbh1VIHlpfF5Q==", - "dev": true, - "license": "ISC", - "dependencies": { - "agent-base": "^7.1.0", - "http-proxy-agent": "^7.0.0", - "https-proxy-agent": "^7.0.1", - "lru-cache": "^10.0.1", - "socks-proxy-agent": "^8.0.3" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/@npmcli/agent/node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/@npmcli/fs": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@npmcli/fs/-/fs-4.0.0.tgz", - "integrity": "sha512-/xGlezI6xfGO9NwuJlnwz/K14qD1kCSAGtacBHnGzeAIuJGazcp45KP5NuyARXoKb7cwulAGWVsbeSxdG/cb0Q==", - "dev": true, - "license": "ISC", - "dependencies": { - "semver": "^7.3.5" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", - "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, - "license": "MIT", - "optional": true, - "engines": { - "node": ">=14" - } - }, - "node_modules/@types/bindings": { - "version": "1.5.5", - "resolved": "https://registry.npmjs.org/@types/bindings/-/bindings-1.5.5.tgz", - "integrity": "sha512-y59PRZBTo2/HuN94qRjyJD+465vGoXMsqz9MMJDbtJL9oT5/B+tAL6c3k10epIinC2/BBkLqKzKC6keukl8wdQ==", - "dev": true, - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/@types/node": { - "version": "24.3.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.0.tgz", - "integrity": "sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==", - "dev": true, - "license": "MIT", - "dependencies": { - "undici-types": "~7.10.0" - } - }, - "node_modules/abbrev": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-3.0.1.tgz", - "integrity": "sha512-AO2ac6pjRB3SJmGJo+v5/aK6Omggp6fsLrs6wN9bd35ulu4cCwaAU9+7ZhXjeqHVkaHThLuzH0nZr0YpCDhygg==", - "dev": true, - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/agent-base": { - "version": "7.1.4", - "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", - "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/ansi-regex": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.2.0.tgz", - "integrity": "sha512-TKY5pyBkHyADOPYlRT9Lx6F544mPl0vS5Ew7BJ45hA08Q+t3GjbueLliBWN3sMICk6+y7HdyxSzC4bWS8baBdg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/ansi-styles": { - "version": "6.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", - "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true, - "license": "MIT" - }, - "node_modules/base64-js": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", - "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/bindings": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", - "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", - "dependencies": { - "file-uri-to-path": "1.0.0" - } - }, - "node_modules/bl": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", - "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", - "dev": true, - "dependencies": { - "buffer": "^5.5.0", - "inherits": "^2.0.4", - "readable-stream": "^3.4.0" - } - }, - "node_modules/brace-expansion": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", - "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "balanced-match": "^1.0.0" - } - }, - "node_modules/buffer": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", - "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "dependencies": { - "base64-js": "^1.3.1", - "ieee754": "^1.1.13" - } - }, - "node_modules/cacache": { - "version": "19.0.1", - "resolved": "https://registry.npmjs.org/cacache/-/cacache-19.0.1.tgz", - "integrity": "sha512-hdsUxulXCi5STId78vRVYEtDAjq99ICAUktLTeTYsLoTE6Z8dS0c8pWNCxwdrk9YfJeobDZc2Y186hD/5ZQgFQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "@npmcli/fs": "^4.0.0", - "fs-minipass": "^3.0.0", - "glob": "^10.2.2", - "lru-cache": "^10.0.1", - "minipass": "^7.0.3", - "minipass-collect": "^2.0.1", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "p-map": "^7.0.2", - "ssri": "^12.0.0", - "tar": "^7.4.3", - "unique-filename": "^4.0.0" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/cacache/node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/chownr": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", - "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", - "dev": true - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true, - "license": "MIT" - }, - "node_modules/cross-spawn": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", - "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, - "license": "MIT", - "dependencies": { - "path-key": "^3.1.0", - "shebang-command": "^2.0.0", - "which": "^2.0.1" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/cross-spawn/node_modules/isexe": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true, - "license": "ISC" - }, - "node_modules/cross-spawn/node_modules/which": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", - "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, - "license": "ISC", - "dependencies": { - "isexe": "^2.0.0" - }, - "bin": { - "node-which": "bin/node-which" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/debug": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", - "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/eastasianwidth": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", - "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "dev": true, - "license": "MIT" - }, - "node_modules/emoji-regex": { - "version": "9.2.2", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true, - "license": "MIT" - }, - "node_modules/encoding": { - "version": "0.1.13", - "resolved": "https://registry.npmjs.org/encoding/-/encoding-0.1.13.tgz", - "integrity": "sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "iconv-lite": "^0.6.2" - } - }, - "node_modules/end-of-stream": { - "version": "1.4.4", - "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", - "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", - "dev": true, - "dependencies": { - "once": "^1.4.0" - } - }, - "node_modules/env-paths": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", - "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", - "dev": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/err-code": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/err-code/-/err-code-2.0.3.tgz", - "integrity": "sha512-2bmlRpNKBxT/CRmPOlyISQpNj+qSeYvcym/uT0Jx2bMOlKLtSy1ZmLuVxSEKKyor/N5yhvp/ZiG1oE3DEYMSFA==", - "dev": true, - "license": "MIT" - }, - "node_modules/exponential-backoff": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.1.tgz", - "integrity": "sha512-dX7e/LHVJ6W3DE1MHWi9S1EYzDESENfLrYohG2G++ovZrYOkm4Knwa0mc1cn84xJOR4KEU0WSchhLbd0UklbHw==", - "dev": true - }, - "node_modules/fdir": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", - "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12.0.0" - }, - "peerDependencies": { - "picomatch": "^3 || ^4" - }, - "peerDependenciesMeta": { - "picomatch": { - "optional": true - } - } - }, - "node_modules/file-uri-to-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", - "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==" - }, - "node_modules/foreground-child": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.1.tgz", - "integrity": "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==", - "dev": true, - "license": "ISC", - "dependencies": { - "cross-spawn": "^7.0.6", - "signal-exit": "^4.0.1" - }, - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/fs-constants": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", - "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", - "dev": true - }, - "node_modules/fs-minipass": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/fs-minipass/-/fs-minipass-3.0.3.tgz", - "integrity": "sha512-XUBA9XClHbnJWSfBzjkm6RvPsyg3sryZt06BEQoXcF7EK/xpGaQYJgQKDJSUH5SGZ76Y7pFx1QBnXz09rU5Fbw==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^7.0.3" - }, - "engines": { - "node": "^14.17.0 || ^16.13.0 || >=18.0.0" - } - }, - "node_modules/glob": { - "version": "10.4.5", - "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", - "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "dev": true, - "license": "ISC", - "dependencies": { - "foreground-child": "^3.1.0", - "jackspeak": "^3.1.2", - "minimatch": "^9.0.4", - "minipass": "^7.1.2", - "package-json-from-dist": "^1.0.0", - "path-scurry": "^1.11.1" - }, - "bin": { - "glob": "dist/esm/bin.mjs" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/graceful-fs": { - "version": "4.2.11", - "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", - "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", - "dev": true - }, - "node_modules/http-cache-semantics": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz", - "integrity": "sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==", - "dev": true, - "license": "BSD-2-Clause" - }, - "node_modules/http-proxy-agent": { - "version": "7.0.2", - "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", - "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", - "dev": true, - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.0", - "debug": "^4.3.4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/https-proxy-agent": { - "version": "7.0.6", - "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", - "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", - "dev": true, - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "4" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/iconv-lite": { - "version": "0.6.3", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", - "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3.0.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/ieee754": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", - "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/imurmurhash": { - "version": "0.1.4", - "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", - "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.8.19" - } - }, - "node_modules/inherits": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true - }, - "node_modules/ip-address": { - "version": "10.0.1", - "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.0.1.tgz", - "integrity": "sha512-NWv9YLW4PoW2B7xtzaS3NCot75m6nK7Icdv0o3lfMceJVRfSoQwqD4wEH5rLwoKJwUiZ/rfpiVBhnaF0FK4HoA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 12" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/isexe": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/isexe/-/isexe-3.1.1.tgz", - "integrity": "sha512-LpB/54B+/2J5hqQ7imZHfdU31OlgQqx7ZicVlkm9kzg9/w8GKLEcFfJl/t7DCEDueOyBAD6zCCwTO6Fzs0NoEQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=16" - } - }, - "node_modules/jackspeak": { - "version": "3.4.3", - "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", - "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "@isaacs/cliui": "^8.0.2" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - }, - "optionalDependencies": { - "@pkgjs/parseargs": "^0.11.0" - } - }, - "node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "dev": true, - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/make-fetch-happen": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/make-fetch-happen/-/make-fetch-happen-14.0.3.tgz", - "integrity": "sha512-QMjGbFTP0blj97EeidG5hk/QhKQ3T4ICckQGLgz38QF7Vgbk6e6FTARN8KhKxyBbWn8R0HU+bnw8aSoFPD4qtQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "@npmcli/agent": "^3.0.0", - "cacache": "^19.0.1", - "http-cache-semantics": "^4.1.1", - "minipass": "^7.0.2", - "minipass-fetch": "^4.0.0", - "minipass-flush": "^1.0.5", - "minipass-pipeline": "^1.2.4", - "negotiator": "^1.0.0", - "proc-log": "^5.0.0", - "promise-retry": "^2.0.1", - "ssri": "^12.0.0" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/minimatch": { - "version": "9.0.5", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", - "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^2.0.1" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/minimist": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", - "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "dev": true, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/minipass-collect": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/minipass-collect/-/minipass-collect-2.0.1.tgz", - "integrity": "sha512-D7V8PO9oaz7PWGLbCACuI1qEOsq7UKfLotx/C0Aet43fCUB/wfQ7DYeq2oR/svFJGYDHPr38SHATeaj/ZoKHKw==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^7.0.3" - }, - "engines": { - "node": ">=16 || 14 >=14.17" - } - }, - "node_modules/minipass-fetch": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/minipass-fetch/-/minipass-fetch-4.0.1.tgz", - "integrity": "sha512-j7U11C5HXigVuutxebFadoYBbd7VSdZWggSe64NVdvWNBqGAiXPL2QVCehjmw7lY1oF9gOllYbORh+hiNgfPgQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "minipass": "^7.0.3", - "minipass-sized": "^1.0.3", - "minizlib": "^3.0.1" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - }, - "optionalDependencies": { - "encoding": "^0.1.13" - } - }, - "node_modules/minipass-flush": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/minipass-flush/-/minipass-flush-1.0.5.tgz", - "integrity": "sha512-JmQSYYpPUqX5Jyn1mXaRwOda1uQ8HP5KAT/oDSLCzt1BYRhQU0/hDtsB1ufZfEEzMZ9aAVmsBw8+FWsIXlClWw==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^3.0.0" - }, - "engines": { - "node": ">= 8" - } - }, - "node_modules/minipass-flush/node_modules/minipass": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", - "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", - "dev": true, - "license": "ISC", - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/minipass-pipeline": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/minipass-pipeline/-/minipass-pipeline-1.2.4.tgz", - "integrity": "sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/minipass-pipeline/node_modules/minipass": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", - "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", - "dev": true, - "license": "ISC", - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/minipass-sized": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/minipass-sized/-/minipass-sized-1.0.3.tgz", - "integrity": "sha512-MbkQQ2CTiBMlA2Dm/5cY+9SWFEN8pzzOXi6rlM5Xxq0Yqbda5ZQy9sU75a673FE9ZK0Zsbr6Y5iP6u9nktfg2g==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/minipass-sized/node_modules/minipass": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-3.3.6.tgz", - "integrity": "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==", - "dev": true, - "license": "ISC", - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/minizlib": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz", - "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==", - "dev": true, - "license": "MIT", - "dependencies": { - "minipass": "^7.1.2" - }, - "engines": { - "node": ">= 18" - } - }, - "node_modules/mkdirp": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz", - "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==", - "dev": true, - "license": "MIT", - "bin": { - "mkdirp": "dist/cjs/src/bin.js" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/mkdirp-classic": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", - "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", - "dev": true - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, - "license": "MIT" - }, - "node_modules/negotiator": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", - "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/node-abi": { - "version": "3.54.0", - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.54.0.tgz", - "integrity": "sha512-p7eGEiQil0YUV3ItH4/tBb781L5impVmmx2E9FRKF7d18XXzp4PGT2tdYMFY6wQqgxD0IwNZOiSJ0/K0fSi/OA==", - "dev": true, - "dependencies": { - "semver": "^7.3.5" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/node-addon-api": { - "version": "8.5.0", - "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.5.0.tgz", - "integrity": "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A==", - "license": "MIT", - "engines": { - "node": "^18 || ^20 || >= 21" - } - }, - "node_modules/node-gyp": { - "version": "11.4.2", - "resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-11.4.2.tgz", - "integrity": "sha512-3gD+6zsrLQH7DyYOUIutaauuXrcyxeTPyQuZQCQoNPZMHMMS5m4y0xclNpvYzoK3VNzuyxT6eF4mkIL4WSZ1eQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "env-paths": "^2.2.0", - "exponential-backoff": "^3.1.1", - "graceful-fs": "^4.2.6", - "make-fetch-happen": "^14.0.3", - "nopt": "^8.0.0", - "proc-log": "^5.0.0", - "semver": "^7.3.5", - "tar": "^7.4.3", - "tinyglobby": "^0.2.12", - "which": "^5.0.0" - }, - "bin": { - "node-gyp": "bin/node-gyp.js" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/node-gyp-build": { - "version": "4.8.4", - "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", - "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", - "license": "MIT", - "bin": { - "node-gyp-build": "bin.js", - "node-gyp-build-optional": "optional.js", - "node-gyp-build-test": "build-test.js" - } - }, - "node_modules/nopt": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/nopt/-/nopt-8.1.0.tgz", - "integrity": "sha512-ieGu42u/Qsa4TFktmaKEwM6MQH0pOWnaB3htzh0JRtx84+Mebc0cbZYN5bC+6WTZ4+77xrL9Pn5m7CV6VIkV7A==", - "dev": true, - "license": "ISC", - "dependencies": { - "abbrev": "^3.0.0" - }, - "bin": { - "nopt": "bin/nopt.js" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/npm-run-path": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-3.1.0.tgz", - "integrity": "sha512-Dbl4A/VfiVGLgQv29URL9xshU8XDY1GeLy+fsaZ1AA8JDSfjvr5P5+pzRbWqRSBxk6/DW7MIh8lTM/PaGnP2kg==", - "dev": true, - "dependencies": { - "path-key": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/once": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", - "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", - "dev": true, - "dependencies": { - "wrappy": "1" - } - }, - "node_modules/p-map": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.3.tgz", - "integrity": "sha512-VkndIv2fIB99swvQoA65bm+fsmt6UNdGeIB0oxBs+WhAhdh08QA04JXpI7rbB9r08/nkbysKoya9rtDERYOYMA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/package-json-from-dist": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", - "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "dev": true, - "license": "BlueOak-1.0.0" - }, - "node_modules/path-key": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", - "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/path-scurry": { - "version": "1.11.1", - "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", - "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", - "dev": true, - "license": "BlueOak-1.0.0", - "dependencies": { - "lru-cache": "^10.2.0", - "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" - }, - "engines": { - "node": ">=16 || 14 >=14.18" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/path-scurry/node_modules/lru-cache": { - "version": "10.4.3", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", - "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, - "license": "ISC" - }, - "node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/prebuildify": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/prebuildify/-/prebuildify-6.0.1.tgz", - "integrity": "sha512-8Y2oOOateom/s8dNBsGIcnm6AxPmLH4/nanQzL5lQMU+sC0CMhzARZHizwr36pUPLdvBnOkCNQzxg4djuFSgIw==", - "dev": true, - "license": "MIT", - "dependencies": { - "minimist": "^1.2.5", - "mkdirp-classic": "^0.5.3", - "node-abi": "^3.3.0", - "npm-run-path": "^3.1.0", - "pump": "^3.0.0", - "tar-fs": "^2.1.0" - }, - "bin": { - "prebuildify": "bin.js" - } - }, - "node_modules/proc-log": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/proc-log/-/proc-log-5.0.0.tgz", - "integrity": "sha512-Azwzvl90HaF0aCz1JrDdXQykFakSSNPaPoiZ9fm5qJIMHioDZEi7OAdRwSm6rSoPtY3Qutnm3L7ogmg3dc+wbQ==", - "dev": true, - "license": "ISC", - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/promise-retry": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/promise-retry/-/promise-retry-2.0.1.tgz", - "integrity": "sha512-y+WKFlBR8BGXnsNlIHFGPZmyDf3DFMoLhaflAnyZgV6rG6xu+JwesTo2Q9R6XwYmtmwAFCkAk3e35jEdoeh/3g==", - "dev": true, - "license": "MIT", - "dependencies": { - "err-code": "^2.0.2", - "retry": "^0.12.0" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/pump": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", - "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", - "dev": true, - "dependencies": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } - }, - "node_modules/readable-stream": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", - "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", - "dev": true, - "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/retry": { - "version": "0.12.0", - "resolved": "https://registry.npmjs.org/retry/-/retry-0.12.0.tgz", - "integrity": "sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 4" - } - }, - "node_modules/safe-buffer": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", - "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true, - "license": "MIT", - "optional": true - }, - "node_modules/semver": { - "version": "7.5.4", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", - "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==", - "dev": true, - "dependencies": { - "lru-cache": "^6.0.0" - }, - "bin": { - "semver": "bin/semver.js" - }, - "engines": { - "node": ">=10" - } - }, - "node_modules/shebang-command": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", - "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, - "license": "MIT", - "dependencies": { - "shebang-regex": "^3.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/shebang-regex": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", - "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/signal-exit": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", - "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=14" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" - } - }, - "node_modules/smart-buffer": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", - "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 6.0.0", - "npm": ">= 3.0.0" - } - }, - "node_modules/socks": { - "version": "2.8.7", - "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.7.tgz", - "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ip-address": "^10.0.1", - "smart-buffer": "^4.2.0" - }, - "engines": { - "node": ">= 10.0.0", - "npm": ">= 3.0.0" - } - }, - "node_modules/socks-proxy-agent": { - "version": "8.0.5", - "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", - "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", - "dev": true, - "license": "MIT", - "dependencies": { - "agent-base": "^7.1.2", - "debug": "^4.3.4", - "socks": "^2.8.3" - }, - "engines": { - "node": ">= 14" - } - }, - "node_modules/ssri": { - "version": "12.0.0", - "resolved": "https://registry.npmjs.org/ssri/-/ssri-12.0.0.tgz", - "integrity": "sha512-S7iGNosepx9RadX82oimUkvr0Ct7IjJbEbs4mJcTxst8um95J3sDYU1RBEOvdu6oL1Wek2ODI5i4MAw+dZ6cAQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^7.0.3" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/string_decoder": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", - "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", - "dev": true, - "dependencies": { - "safe-buffer": "~5.2.0" - } - }, - "node_modules/string-width": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", - "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", - "dev": true, - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^9.2.2", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/string-width-cjs": { - "name": "string-width", - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/string-width-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/string-width-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", - "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/strip-ansi-cjs": { - "name": "strip-ansi", - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/tar": { - "version": "7.4.3", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz", - "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==", - "dev": true, - "license": "ISC", - "dependencies": { - "@isaacs/fs-minipass": "^4.0.0", - "chownr": "^3.0.0", - "minipass": "^7.1.2", - "minizlib": "^3.0.1", - "mkdirp": "^3.0.1", - "yallist": "^5.0.0" - }, - "engines": { - "node": ">=18" - } - }, - "node_modules/tar-fs": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.3.tgz", - "integrity": "sha512-090nwYJDmlhwFwEW3QQl+vaNnxsO2yVsd45eTKRBzSzu+hlb1w2K9inVq5b0ngXuLVqQ4ApvsUHHnu/zQNkWAg==", - "dev": true, - "license": "MIT", - "dependencies": { - "chownr": "^1.1.1", - "mkdirp-classic": "^0.5.2", - "pump": "^3.0.0", - "tar-stream": "^2.1.4" - } - }, - "node_modules/tar-stream": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", - "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", - "dev": true, - "dependencies": { - "bl": "^4.0.3", - "end-of-stream": "^1.4.1", - "fs-constants": "^1.0.0", - "inherits": "^2.0.3", - "readable-stream": "^3.1.1" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/tar/node_modules/chownr": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", - "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", - "dev": true, - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/tar/node_modules/yallist": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", - "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", - "dev": true, - "license": "BlueOak-1.0.0", - "engines": { - "node": ">=18" - } - }, - "node_modules/tinyglobby": { - "version": "0.2.14", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz", - "integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "fdir": "^6.4.4", - "picomatch": "^4.0.2" - }, - "engines": { - "node": ">=12.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/SuperchupuDev" - } - }, - "node_modules/typescript": { - "version": "5.9.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", - "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, - "node_modules/undici-types": { - "version": "7.10.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz", - "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==", - "dev": true, - "license": "MIT" - }, - "node_modules/unique-filename": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unique-filename/-/unique-filename-4.0.0.tgz", - "integrity": "sha512-XSnEewXmQ+veP7xX2dS5Q4yZAvO40cBN2MWkJ7D/6sW4Dg6wYBNwM1Vrnz1FhH5AdeLIlUXRI9e28z1YZi71NQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "unique-slug": "^5.0.0" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/unique-slug": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/unique-slug/-/unique-slug-5.0.0.tgz", - "integrity": "sha512-9OdaqO5kwqR+1kVgHAhsp5vPNU0hnxRa26rBFNfNgM7M6pNtgzeBn3s/xbyCQL3dcjzOatcef6UUHpB/6MaETg==", - "dev": true, - "license": "ISC", - "dependencies": { - "imurmurhash": "^0.1.4" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/util-deprecate": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", - "dev": true - }, - "node_modules/which": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/which/-/which-5.0.0.tgz", - "integrity": "sha512-JEdGzHwwkrbWoGOlIHqQ5gtprKGOenpDHpxE9zVR1bWbOtYRyPPHMe9FaP6x61CmNaTThSkb0DAJte5jD+DmzQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "isexe": "^3.1.1" - }, - "bin": { - "node-which": "bin/which.js" - }, - "engines": { - "node": "^18.17.0 || >=20.5.0" - } - }, - "node_modules/wrap-ansi": { - "version": "8.1.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", - "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^6.1.0", - "string-width": "^5.0.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs": { - "name": "wrap-ansi", - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/wrap-ansi-cjs/node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi-cjs/node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrappy": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", - "dev": true - }, - "node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", - "dev": true - } - } -} diff --git a/package.json b/package.json index e0686ee5c..3512b582a 100644 --- a/package.json +++ b/package.json @@ -4,25 +4,24 @@ "description": "Smaller & Faster Single-File Vector Search Engine from Unum", "author": "Ash Vardanian (https://ashvardanian.com/)", "license": "Apache 2.0", - "homepage": "https://unum-cloud.github.io/usearch/", + "homepage": "https://unum-cloud.github.io/USearch/", "repository": { "type": "git", - "url": "https://github.com/unum-cloud/usearch.git" + "url": "https://github.com/unum-cloud/USearch.git" }, "bugs": { - "url": "https://github.com/unum-cloud/usearch/issues", + "url": "https://github.com/unum-cloud/USearch/issues", "email": "info@unum.cloud" }, "engines": { - "node": "~10 >=10.20 || >=12.17" + "node": ">=20" }, "files": [ "binding.gyp", "c", "cpp", "cmake", - "fp16", - "simsimd", + "numkong", "include", "javascript", "prebuilds" @@ -55,4 +54,4 @@ "require": "./javascript/dist/cjs/usearch.js" } } -} +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index bdbf2ad4f..7ef6a4971 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,34 +3,34 @@ # - `macos` wheels for x86_64, arm64, and universal2; # - `windows` wheels for AMD64, and ARM64. But not x86. # - `manylinux` and `musllinux` wheels for Linux on x86_64, aarch64. But not i686, ppc64le, s390x; -# * for CPython versions from 3.7 to 3.14. -# * for PyPy versions from 3.7 to 3.10. -# = meaning 7 platforms * 11 Python versions = 77 builds. +# * for CPython versions from 3.10 to 3.14 (including free-threaded 3.13t and 3.14t). +# = meaning 7 platforms * 7 Python versions = 49 builds. [build-system] +build-backend = "setuptools.build_meta" requires = [ - "setuptools>=42", - "wheel", - "cmake>=3.22", - "pybind11", - "numpy", - "simsimd>=6.0.5,<7.0.0", + "setuptools>=42", + "wheel", + "cmake>=3.22", + "pybind11>=2.13.6", + "numpy", + "numkong>=7.5.0", ] -build-backend = "setuptools.build_meta" [tool.pytest.ini_options] -minversion = "6.0" addopts = "-ra --showlocals --strict-markers --strict-config -s -x -p no:warnings" -xfail_strict = true filterwarnings = ["error"] +minversion = "6.0" +testpaths = ["python/scripts"] +xfail_strict = true # Avoid running tests, as everything is happening in a super slow container # We have already run all the relavent Python tests in `prerelease.yml` # test-requires = ["pytest", "numpy"] # test-command = "pytest {project}/python/scripts" [tool.cibuildwheel] -test-requires = [] -test-command = "" build-verbosity = 0 +test-command = "" +test-requires = [] # Add "pp*" to skip PyPy builds, but they should work fine these days :) # https://cibuildwheel.readthedocs.io/en/stable/options/#build-skip @@ -40,9 +40,9 @@ skip = [] [tool.cibuildwheel.linux] archs = ["x86_64", "aarch64"] before-build = [ - "rm -rf {project}/CMakeCache.txt {project}/build {project}/build_debug {project}/CMakeFiles.txt {project}/_deps {project}/.pytest_cache", - "mkdir -p build/usearch", - "git submodule update --init --recursive", + "rm -rf {project}/CMakeCache.txt {project}/build {project}/build_debug {project}/CMakeFiles.txt {project}/_deps {project}/.pytest_cache", + "mkdir -p build/usearch", + "git submodule update --init --recursive", ] repair-wheel-command = "auditwheel repair --lib-sdir . -w {dest_dir} {wheel}" @@ -51,43 +51,75 @@ repair-wheel-command = "auditwheel repair --lib-sdir . -w {dest_dir} {wheel}" # You can keep track of the most recent images on Quay: # - for `manylinux`: https://quay.io/search?q=manylinux # - for `musllinux`: https://quay.io/search?q=musllinux -manylinux-x86_64-image = "manylinux_2_28" manylinux-aarch64-image = "manylinux_2_28" -musllinux-x86_64-image = "musllinux_1_2" +manylinux-x86_64-image = "manylinux_2_28" musllinux-aarch64-image = "musllinux_1_2" +musllinux-x86_64-image = "musllinux_1_2" # On CentOS we have to use `yum`. # The healthy version would be: `apt-get update && apt-get install -y libc6-dev wget python3-dev`. -before-all = ["yum update -y && yum install -y glibc-devel wget python3-devel"] +before-all = [ + "yum update -y --disablerepo=extras --setopt=install_weak_deps=0 && yum install -y --disablerepo=extras glibc-devel wget python3-devel", +] # With `musl` builds, we obviously don't need the `glibc` and can't use `yum`. # This may also be handy for using custom dependencies for different Python versions: # https://cibuildwheel.readthedocs.io/en/stable/options/#overrides [[tool.cibuildwheel.overrides]] -select = "*-musllinux*" before-all = "apk add --update wget python3-dev" +select = "*-musllinux*" [tool.cibuildwheel.macos] archs = ["x86_64", "universal2", "arm64"] before-build = [ - "rm -rf {project}/CMakeCache.txt {project}/build {project}/build_debug {project}/CMakeFiles.txt {project}/_deps {project}/.pytest_cache", - "mkdir -p build/usearch", - "git submodule update --init --recursive", - "export CC=$(echo CXX)", + "rm -rf {project}/CMakeCache.txt {project}/build {project}/build_debug {project}/CMakeFiles.txt {project}/_deps {project}/.pytest_cache", + "mkdir -p build/usearch", + "git submodule update --init --recursive", + "export CC=$(echo CXX)", ] repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" - [tool.cibuildwheel.windows] archs = ["AMD64", "ARM64"] before-build = [ - "rd /s /q {project}\\CMakeCache.txt {project}\\build {project}\\build_debug {project}\\CMakeFiles.txt {project}\\_deps {project}\\.pytest_cache || echo Done", - "md build\\usearch", - "git submodule update --init --recursive", + "rd /s /q {project}\\CMakeCache.txt {project}\\build {project}\\build_debug {project}\\CMakeFiles.txt {project}\\_deps {project}\\.pytest_cache || echo Done", + "md build\\usearch", + "git submodule update --init --recursive", +] + +[dependency-groups] +lint = [ + "ruff>=0.15.8", +] +tests = [ + "numpy>=1.21", + "pytest>=9.0.2", + "pytest-repeat>=0.9.4", ] -# Configuration options for the Black formatter: -# https://black.readthedocs.io/en/latest/usage_and_configuration/the_basics.html#where-black-looks-for-the-file -[tool.black] -line-length = 120 # Set line length to the same value as in `.clang-format` for modern wide screens -target-version = ['py36', 'py314'] +[tool.ruff] +line-length = 120 +target-version = "py310" + +[tool.ruff.format] +quote-style = "double" + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort (import ordering) + "UP", # pyupgrade (modernize syntax) +] +ignore = [ + "E501", # line too long (handled by formatter) + "E741", # ambiguous variable names (l, O, I) + "UP007", # Union[X, Y] -> X | Y (deferred to v3, requires annotation refactor) +] + +[tool.ruff.lint.per-file-ignores] +"python/usearch/__init__.py" = ["F401", "E402"] # re-exports and conditional imports + +[tool.ruff.lint.isort] +known-first-party = ["usearch"] diff --git a/python/lib.cpp b/python/lib.cpp index a14435073..79ca87762 100644 --- a/python/lib.cpp +++ b/python/lib.cpp @@ -215,7 +215,8 @@ template static void add_many_to_index( // index_at& index, py::buffer keys, py::buffer vectors, // bool force_copy, std::size_t threads, // - progress_func_t const& progress) { + progress_func_t const& progress, // + scalar_kind_t scalar_kind = scalar_kind_t::unknown_k) { py::buffer_info keys_info = keys.request(); py::buffer_info vectors_info = vectors.request(); @@ -247,12 +248,21 @@ static void add_many_to_index( // throw std::invalid_argument("Out of memory!"); // clang-format off - switch (numpy_string_to_kind(vectors_info.format)) { - case scalar_kind_t::b1x8_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; - case scalar_kind_t::i8_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; - case scalar_kind_t::f16_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; - case scalar_kind_t::f32_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + scalar_kind_t kind = (scalar_kind != scalar_kind_t::unknown_k) + ? scalar_kind + : numpy_string_to_kind(vectors_info.format); + switch (kind) { case scalar_kind_t::f64_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::f32_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::bf16_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::f16_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::e5m2_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::e4m3_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::e3m2_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::e2m3_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::i8_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::u8_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; + case scalar_kind_t::b1x8_k: add_typed_to_index(index, keys_info, vectors_info, force_copy, threads, progress); break; default: throw std::invalid_argument("Incompatible scalars in the vectors matrix: " + vectors_info.format); } // clang-format on @@ -415,7 +425,7 @@ static void search_typed( // template static py::tuple search_many_in_index( // index_at& index, py::buffer vectors, std::size_t wanted, bool exact, std::size_t threads, - progress_func_t const& progress) { + progress_func_t const& progress, scalar_kind_t scalar_kind = scalar_kind_t::unknown_k) { if (wanted == 0) return py::tuple(5); @@ -441,12 +451,21 @@ static py::tuple search_many_in_index( // std::atomic stats_computed_distances(0); // clang-format off - switch (numpy_string_to_kind(vectors_info.format)) { - case scalar_kind_t::b1x8_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; - case scalar_kind_t::i8_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; - case scalar_kind_t::f16_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; - case scalar_kind_t::f32_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + scalar_kind_t kind = (scalar_kind != scalar_kind_t::unknown_k) + ? scalar_kind + : numpy_string_to_kind(vectors_info.format); + switch (kind) { case scalar_kind_t::f64_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::f32_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::bf16_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::f16_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::e5m2_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::e4m3_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::e3m2_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::e2m3_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::i8_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::u8_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; + case scalar_kind_t::b1x8_k: search_typed(index, vectors_info, wanted, exact, threads, keys_py, distances_py, counts_py, stats_visited_members, stats_computed_distances, progress); break; default: throw std::invalid_argument("Incompatible scalars in the query matrix: " + vectors_info.format); } // clang-format on @@ -470,7 +489,8 @@ static py::tuple search_many_brute_force( // metric_kind_t metric_kind, // metric_punned_signature_t metric_signature, // std::uintptr_t metric_uintptr, // - progress_func_t const& progress_func) { + progress_func_t const& progress_func, // + scalar_kind_t scalar_kind = scalar_kind_t::unknown_k) { if (wanted == 0) return py::tuple(5); @@ -496,8 +516,12 @@ static py::tuple search_many_brute_force( // if (wanted > dataset_count) throw std::invalid_argument("You can't request more matches than in the dataset!"); - scalar_kind_t dataset_kind = numpy_string_to_kind(dataset_info.format); - scalar_kind_t queries_kind = numpy_string_to_kind(queries_info.format); + scalar_kind_t dataset_kind = (scalar_kind != scalar_kind_t::unknown_k) + ? scalar_kind + : numpy_string_to_kind(dataset_info.format); + scalar_kind_t queries_kind = (scalar_kind != scalar_kind_t::unknown_k) + ? scalar_kind + : numpy_string_to_kind(queries_info.format); if (dataset_kind != queries_kind) throw std::invalid_argument("The types of vectors don't match!"); @@ -661,7 +685,8 @@ template struct rows_lookup_gt { template static py::tuple cluster_vectors( // index_at& index, py::buffer queries, // - std::size_t min_count, std::size_t max_count, std::size_t threads, progress_func_t const& progress) { + std::size_t min_count, std::size_t max_count, std::size_t threads, progress_func_t const& progress, + scalar_kind_t scalar_kind = scalar_kind_t::unknown_k) { // Clamp threads to hardware limit instead of throwing threads = std::min(threads, std::thread::hardware_concurrency()); @@ -694,12 +719,21 @@ static py::tuple cluster_vectors( // rows_lookup_gt queries_end = queries_begin + queries_count; // clang-format off - switch (numpy_string_to_kind(queries_info.format)) { - case scalar_kind_t::b1x8_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; - case scalar_kind_t::i8_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; - case scalar_kind_t::f16_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; - case scalar_kind_t::f32_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + scalar_kind_t kind = (scalar_kind != scalar_kind_t::unknown_k) + ? scalar_kind + : numpy_string_to_kind(queries_info.format); + switch (kind) { case scalar_kind_t::f64_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::f32_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::bf16_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::f16_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::e5m2_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::e4m3_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::e3m2_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::e2m3_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::i8_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::u8_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; + case scalar_kind_t::b1x8_k: cluster_result = index.cluster(queries_begin.as(), queries_end.as(), config, keys_ptr, distances_ptr, executor, progress_t{progress}); break; default: throw std::invalid_argument("Incompatible scalars in the query matrix: " + queries_info.format); } // clang-format on @@ -969,21 +1003,33 @@ static py::object get_typed_vectors_for_keys(index_at const& index, py::buffer k } template py::object get_many(index_at const& index, py::buffer keys, scalar_kind_t scalar_kind) { - if (scalar_kind == scalar_kind_t::f32_k) - return get_typed_vectors_for_keys(index, keys); - else if (scalar_kind == scalar_kind_t::f64_k) + if (scalar_kind == scalar_kind_t::f64_k) return get_typed_vectors_for_keys(index, keys); + else if (scalar_kind == scalar_kind_t::f32_k) + return get_typed_vectors_for_keys(index, keys); else if (scalar_kind == scalar_kind_t::f16_k) return get_typed_vectors_for_keys(index, keys); else if (scalar_kind == scalar_kind_t::i8_k) return get_typed_vectors_for_keys(index, keys); + else if (scalar_kind == scalar_kind_t::u8_k) + return get_typed_vectors_for_keys(index, keys); + else if (scalar_kind == scalar_kind_t::bf16_k) + return get_typed_vectors_for_keys(index, keys); + else if (scalar_kind == scalar_kind_t::e5m2_k) + return get_typed_vectors_for_keys(index, keys); + else if (scalar_kind == scalar_kind_t::e4m3_k) + return get_typed_vectors_for_keys(index, keys); + else if (scalar_kind == scalar_kind_t::e3m2_k) + return get_typed_vectors_for_keys(index, keys); + else if (scalar_kind == scalar_kind_t::e2m3_k) + return get_typed_vectors_for_keys(index, keys); else if (scalar_kind == scalar_kind_t::b1x8_k) return get_typed_vectors_for_keys(index, keys); else throw std::invalid_argument("Incompatible scalars in the query matrix!"); } -PYBIND11_MODULE(compiled, m) { +PYBIND11_MODULE(compiled, m, py::mod_gil_not_used()) { m.doc() = "Smaller & Faster Single-File Vector Search Engine from Unum"; m.attr("DEFAULT_CONNECTIVITY") = py::int_(default_connectivity()); @@ -991,12 +1037,14 @@ PYBIND11_MODULE(compiled, m) { m.attr("DEFAULT_EXPANSION_SEARCH") = py::int_(default_expansion_search()); m.attr("USES_OPENMP") = py::int_(USEARCH_USE_OPENMP); - m.attr("USES_FP16LIB") = py::int_(USEARCH_USE_FP16LIB); - m.attr("USES_SIMSIMD") = py::int_(USEARCH_USE_SIMSIMD); -#if USEARCH_USE_SIMSIMD - m.attr("USES_SIMSIMD_DYNAMIC_DISPATCH") = py::int_(simsimd_uses_dynamic_dispatch()); + m.attr("USES_NUMKONG") = py::int_(USEARCH_USE_NUMKONG); + m.attr("USES_SIMSIMD") = py::int_(USEARCH_USE_NUMKONG); // backwards compatibility +#if USEARCH_USE_NUMKONG + m.attr("USES_NUMKONG_DYNAMIC_DISPATCH") = py::int_(nk_uses_dynamic_dispatch()); + m.attr("USES_SIMSIMD_DYNAMIC_DISPATCH") = py::int_(nk_uses_dynamic_dispatch()); // backwards compatibility #else - m.attr("USES_SIMSIMD_DYNAMIC_DISPATCH") = py::int_(0); + m.attr("USES_NUMKONG_DYNAMIC_DISPATCH") = py::int_(0); + m.attr("USES_SIMSIMD_DYNAMIC_DISPATCH") = py::int_(0); // backwards compatibility #endif m.attr("VERSION_MAJOR") = py::int_(USEARCH_VERSION_MAJOR); @@ -1027,22 +1075,25 @@ PYBIND11_MODULE(compiled, m) { py::enum_(m, "ScalarKind", py::arithmetic()) .value("Unknown", scalar_kind_t::unknown_k) - .value("B1", scalar_kind_t::b1x8_k) - .value("U40", scalar_kind_t::u40_k) - .value("UUID", scalar_kind_t::uuid_k) - .value("BF16", scalar_kind_t::bf16_k) .value("F64", scalar_kind_t::f64_k) .value("F32", scalar_kind_t::f32_k) + .value("BF16", scalar_kind_t::bf16_k) .value("F16", scalar_kind_t::f16_k) - .value("F8", scalar_kind_t::f8_k) + .value("E5M2", scalar_kind_t::e5m2_k) + .value("E4M3", scalar_kind_t::e4m3_k) + .value("E3M2", scalar_kind_t::e3m2_k) + .value("E2M3", scalar_kind_t::e2m3_k) + .value("I8", scalar_kind_t::i8_k) + .value("U8", scalar_kind_t::u8_k) + .value("B1", scalar_kind_t::b1x8_k) + .value("U40", scalar_kind_t::u40_k) + .value("UUID", scalar_kind_t::uuid_k) .value("U64", scalar_kind_t::u64_k) .value("U32", scalar_kind_t::u32_k) .value("U16", scalar_kind_t::u16_k) - .value("U8", scalar_kind_t::u8_k) .value("I64", scalar_kind_t::i64_k) .value("I32", scalar_kind_t::i32_k) - .value("I16", scalar_kind_t::i16_k) - .value("I8", scalar_kind_t::i8_k); + .value("I16", scalar_kind_t::i16_k); m.def("index_dense_metadata_from_path", [](std::string const& path) -> py::dict { index_dense_metadata_result_t meta = index_dense_metadata_from_path(path.c_str()); @@ -1066,7 +1117,8 @@ PYBIND11_MODULE(compiled, m) { py::arg("metric_kind") = metric_kind_t::cos_k, // py::arg("metric_signature") = metric_punned_signature_t::array_array_k, // py::arg("metric_pointer") = 0, // - py::arg("progress") = nullptr // + py::arg("progress") = nullptr, // + py::arg("dtype") = scalar_kind_t::unknown_k // ); m.def( // @@ -1096,6 +1148,9 @@ PYBIND11_MODULE(compiled, m) { py::arg("metric_kind") = metric_kind_t::cos_k // ); + m.def("hardware_acceleration_compiled", &hardware_acceleration_compiled); + m.def("hardware_acceleration_available", &hardware_acceleration_available); + auto i = py::class_>(m, "Index"); i.def( // @@ -1120,7 +1175,8 @@ PYBIND11_MODULE(compiled, m) { py::kw_only(), // py::arg("copy") = true, // py::arg("threads") = 0, // - py::arg("progress") = nullptr // + py::arg("progress") = nullptr, // + py::arg("dtype") = scalar_kind_t::unknown_k // ); i.def( // @@ -1129,7 +1185,8 @@ PYBIND11_MODULE(compiled, m) { py::arg("count") = 10, // py::arg("exact") = false, // py::arg("threads") = 0, // - py::arg("progress") = nullptr // + py::arg("progress") = nullptr, // + py::arg("dtype") = scalar_kind_t::unknown_k // ); i.def( // @@ -1138,7 +1195,8 @@ PYBIND11_MODULE(compiled, m) { py::arg("min_count") = 0, // py::arg("max_count") = 0, // py::arg("threads") = 0, // - py::arg("progress") = nullptr // + py::arg("progress") = nullptr, // + py::arg("dtype") = scalar_kind_t::unknown_k // ); i.def( // @@ -1236,7 +1294,8 @@ PYBIND11_MODULE(compiled, m) { i.def_property_readonly( // "dtype", [](dense_index_py_t const& index) -> scalar_kind_t { return index.scalar_kind(); }); - i.def_property_readonly("serialized_length", &dense_index_py_t::serialized_length); + i.def_property_readonly("serialized_length", + [](dense_index_py_t const& self) -> std::size_t { return self.serialized_length({}); }); i.def_property_readonly("memory_usage", &dense_index_py_t::memory_usage); i.def_property("expansion_add", &dense_index_py_t::expansion_add, &dense_index_py_t::change_expansion_add); @@ -1384,6 +1443,7 @@ PYBIND11_MODULE(compiled, m) { py::arg("count") = 10, // py::arg("exact") = false, // py::arg("threads") = 0, // - py::arg("progress") = nullptr // + py::arg("progress") = nullptr, // + py::arg("dtype") = scalar_kind_t::unknown_k // ); } diff --git a/python/scripts/bench.ipynb b/python/scripts/bench.ipynb deleted file mode 100644 index 3892b57d5..000000000 --- a/python/scripts/bench.ipynb +++ /dev/null @@ -1,263 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "('skylake', 'skylake', 'sapphire', 'ice', 'ice')" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from usearch.index import Index\n", - "\n", - "f64_support = Index(ndim=256, metric=\"cos\", dtype=\"f64\").hardware_acceleration\n", - "f32_support = Index(ndim=256, metric=\"cos\", dtype=\"f32\").hardware_acceleration\n", - "f16_support = Index(ndim=256, metric=\"cos\", dtype=\"f16\").hardware_acceleration\n", - "i8_support = Index(ndim=256, metric=\"cos\", dtype=\"i8\").hardware_acceleration\n", - "b1_support = Index(ndim=256, metric=\"hamming\", dtype=\"b1\").hardware_acceleration\n", - "\n", - "f64_support, f32_support, f16_support, i8_support, b1_support" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: faiss-cpu in /home/ubuntu/miniconda3/lib/python3.11/site-packages (1.7.4)\n", - "Requirement already satisfied: numba in /home/ubuntu/miniconda3/lib/python3.11/site-packages (0.58.1)\n", - "Requirement already satisfied: pandas in /home/ubuntu/miniconda3/lib/python3.11/site-packages (2.2.1)\n", - "Requirement already satisfied: plotly in /home/ubuntu/miniconda3/lib/python3.11/site-packages (5.17.0)\n", - "Requirement already satisfied: nbformat in /home/ubuntu/miniconda3/lib/python3.11/site-packages (5.9.2)\n", - "Requirement already satisfied: jinja2 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (3.1.2)\n", - "Requirement already satisfied: kaleido in /home/ubuntu/miniconda3/lib/python3.11/site-packages (0.2.1)\n", - "Requirement already satisfied: tqdm in /home/ubuntu/miniconda3/lib/python3.11/site-packages (4.66.2)\n", - "Requirement already satisfied: scikit-learn in /home/ubuntu/miniconda3/lib/python3.11/site-packages (1.3.1)\n", - "Requirement already satisfied: scipy in /home/ubuntu/miniconda3/lib/python3.11/site-packages (1.12.0)\n", - "Requirement already satisfied: matplotlib in /home/ubuntu/miniconda3/lib/python3.11/site-packages (3.8.0)\n", - "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from numba) (0.41.1)\n", - "Requirement already satisfied: numpy<1.27,>=1.22 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from numba) (1.26.4)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from pandas) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from pandas) (2024.1)\n", - "Requirement already satisfied: tzdata>=2022.7 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from pandas) (2024.1)\n", - "Requirement already satisfied: tenacity>=6.2.0 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from plotly) (8.2.3)\n", - "Requirement already satisfied: packaging in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from plotly) (23.2)\n", - "Requirement already satisfied: fastjsonschema in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from nbformat) (2.18.1)\n", - "Requirement already satisfied: jsonschema>=2.6 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from nbformat) (4.19.1)\n", - "Requirement already satisfied: jupyter-core in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from nbformat) (5.4.0)\n", - "Requirement already satisfied: traitlets>=5.1 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from nbformat) (5.11.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from jinja2) (2.1.3)\n", - "Requirement already satisfied: joblib>=1.1.1 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from scikit-learn) (1.2.0)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from scikit-learn) (3.2.0)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from matplotlib) (1.1.1)\n", - "Requirement already satisfied: cycler>=0.10 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from matplotlib) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from matplotlib) (4.43.1)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from matplotlib) (1.4.5)\n", - "Requirement already satisfied: pillow>=6.2.0 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from matplotlib) (10.1.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from matplotlib) (3.1.1)\n", - "Requirement already satisfied: attrs>=22.2.0 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat) (23.2.0)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat) (2023.7.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat) (0.30.2)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from jsonschema>=2.6->nbformat) (0.10.6)\n", - "Requirement already satisfied: six>=1.5 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", - "Requirement already satisfied: platformdirs>=2.5 in /home/ubuntu/miniconda3/lib/python3.11/site-packages (from jupyter-core->nbformat) (4.2.0)\n" - ] - } - ], - "source": [ - "!pip install faiss-cpu numba pandas plotly nbformat jinja2 kaleido tqdm scikit-learn scipy matplotlib" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from bench import *\n", - "import plotly.graph_objects as go" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def speed_chart(df, title):\n", - " subtitles = [\n", - " f'{n}, recall@1 ~{r*100:.2f}%'\n", - " for n, r in zip(df['names'], df['recall_at_one'])\n", - " ]\n", - " fig = go.Figure(\n", - " data=[\n", - " go.Bar(name='Add', x=subtitles, y=df['add_per_second']),\n", - " go.Bar(name='Search', x=subtitles, y=df['search_per_second']),\n", - " ])\n", - " fig.update_layout(\n", - " width=1000,\n", - " height=1000,\n", - " barmode='group',\n", - " title=dict(\n", - " text=title,\n", - " ),\n", - " legend=dict(\n", - " orientation='h',\n", - " entrywidth=70,\n", - " yanchor='bottom',\n", - " y=1.02,\n", - " xanchor='right',\n", - " x=1,\n", - " ),\n", - " )\n", - " fig.write_image(title + '.png')\n", - " return fig" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "from usearch.eval import Dataset\n", - "data = Dataset.build(\n", - " vectors='../../datasets/wiki_1M/base.1M.fbin',\n", - " queries='../../datasets/wiki_1M/query.public.100K.fbin',\n", - " neighbors='../../datasets/wiki_1M/groundtruth.public.100K.ibin',\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "ename": "RuntimeError", - "evalue": "Operation has been terminated", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m eval_default \u001b[38;5;241m=\u001b[39m Evaluation\u001b[38;5;241m.\u001b[39mfor_dataset(data)\n\u001b[0;32m----> 2\u001b[0m results_default \u001b[38;5;241m=\u001b[39m \u001b[43mbench_speed\u001b[49m\u001b[43m(\u001b[49m\u001b[43meval_default\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m results_default\n", - "File \u001b[0;32m~/USearch/python/scripts/bench.py:76\u001b[0m, in \u001b[0;36mbench_speed\u001b[0;34m(eval, connectivity, expansion_add, expansion_search, jit, train)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;66;03m# Time to evaluate:\u001b[39;00m\n\u001b[0;32m---> 76\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[38;5;28;43meval\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mindexes\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[1;32m 78\u001b[0m {\n\u001b[1;32m 79\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m: [i\u001b[38;5;241m.\u001b[39mpath \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m indexes],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 83\u001b[0m }\n\u001b[1;32m 84\u001b[0m )\n", - "File \u001b[0;32m~/USearch/python/scripts/bench.py:76\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;66;03m# Time to evaluate:\u001b[39;00m\n\u001b[0;32m---> 76\u001b[0m results \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;43meval\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m index \u001b[38;5;129;01min\u001b[39;00m indexes]\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[1;32m 78\u001b[0m {\n\u001b[1;32m 79\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m: [i\u001b[38;5;241m.\u001b[39mpath \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m indexes],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 83\u001b[0m }\n\u001b[1;32m 84\u001b[0m )\n", - "File \u001b[0;32m~/USearch/python/usearch/eval.py:448\u001b[0m, in \u001b[0;36mEvaluation.__call__\u001b[0;34m(self, index, post_clean)\u001b[0m\n\u001b[1;32m 446\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 447\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtasks:\n\u001b[0;32m--> 448\u001b[0m task_result \u001b[38;5;241m=\u001b[39m task_result \u001b[38;5;241m+\u001b[39m \u001b[43mtask\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m:\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n", - "File \u001b[0;32m~/USearch/python/usearch/eval.py:330\u001b[0m, in \u001b[0;36mAddTask.__call__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m 328\u001b[0m batch_size: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvectors\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 329\u001b[0m old_size: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(index)\n\u001b[0;32m--> 330\u001b[0m dt, _ \u001b[38;5;241m=\u001b[39m \u001b[43mmeasure_seconds\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvectors\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(index) \u001b[38;5;241m==\u001b[39m old_size \u001b[38;5;241m+\u001b[39m batch_size\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m TaskResult(\n\u001b[1;32m 334\u001b[0m add_operations\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[1;32m 335\u001b[0m add_per_second\u001b[38;5;241m=\u001b[39mbatch_size \u001b[38;5;241m/\u001b[39m dt,\n\u001b[1;32m 336\u001b[0m )\n", - "File \u001b[0;32m~/USearch/python/usearch/eval.py:140\u001b[0m, in \u001b[0;36mmeasure_seconds\u001b[0;34m(f)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Simple function profiling decorator.\u001b[39;00m\n\u001b[1;32m 133\u001b[0m \n\u001b[1;32m 134\u001b[0m \u001b[38;5;124;03m:param f: Function to be profiled\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;124;03m:rtype: Tuple[float, Any]\u001b[39;00m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 139\u001b[0m a \u001b[38;5;241m=\u001b[39m time_ns()\n\u001b[0;32m--> 140\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 141\u001b[0m b \u001b[38;5;241m=\u001b[39m time_ns()\n\u001b[1;32m 142\u001b[0m c \u001b[38;5;241m=\u001b[39m b \u001b[38;5;241m-\u001b[39m a\n", - "File \u001b[0;32m~/USearch/python/usearch/eval.py:330\u001b[0m, in \u001b[0;36mAddTask.__call__..\u001b[0;34m()\u001b[0m\n\u001b[1;32m 328\u001b[0m batch_size: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvectors\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 329\u001b[0m old_size: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(index)\n\u001b[0;32m--> 330\u001b[0m dt, _ \u001b[38;5;241m=\u001b[39m measure_seconds(\u001b[38;5;28;01mlambda\u001b[39;00m: \u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvectors\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(index) \u001b[38;5;241m==\u001b[39m old_size \u001b[38;5;241m+\u001b[39m batch_size\n\u001b[1;32m 333\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m TaskResult(\n\u001b[1;32m 334\u001b[0m add_operations\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[1;32m 335\u001b[0m add_per_second\u001b[38;5;241m=\u001b[39mbatch_size \u001b[38;5;241m/\u001b[39m dt,\n\u001b[1;32m 336\u001b[0m )\n", - "File \u001b[0;32m~/USearch/python/usearch/index.py:659\u001b[0m, in \u001b[0;36mIndex.add\u001b[0;34m(self, keys, vectors, copy, threads, log, progress)\u001b[0m\n\u001b[1;32m 620\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21madd\u001b[39m(\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 622\u001b[0m keys: KeyOrKeysLike,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 628\u001b[0m progress: Optional[ProgressCallback] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 629\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[\u001b[38;5;28mint\u001b[39m, np\u001b[38;5;241m.\u001b[39mndarray]:\n\u001b[1;32m 630\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Inserts one or move vectors into the index.\u001b[39;00m\n\u001b[1;32m 631\u001b[0m \n\u001b[1;32m 632\u001b[0m \u001b[38;5;124;03m For maximal performance the `keys` and `vectors`\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;124;03m :type: Union[int, np.ndarray]\u001b[39;00m\n\u001b[1;32m 658\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 659\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_add_to_compiled\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 660\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_compiled\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 661\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 662\u001b[0m \u001b[43m \u001b[49m\u001b[43mvectors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvectors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 663\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 664\u001b[0m \u001b[43m \u001b[49m\u001b[43mthreads\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mthreads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 665\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 666\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 667\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/USearch/python/usearch/index.py:266\u001b[0m, in \u001b[0;36m_add_to_compiled\u001b[0;34m(compiled, keys, vectors, copy, threads, log, progress)\u001b[0m\n\u001b[1;32m 264\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m 265\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 266\u001b[0m \u001b[43mcompiled\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43madd_many\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvectors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mthreads\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mthreads\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprogress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m keys\n", - "\u001b[0;31mRuntimeError\u001b[0m: Operation has been terminated" - ] - } - ], - "source": [ - "eval_default = Evaluation.for_dataset(data)\n", - "results_default = bench_speed(eval_default)\n", - "results_default" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "speed_chart(results_default, 'USearch vs FAISS Speed')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "eval_sliced = Evaluation.for_dataset(data, batch_size=256)\n", - "results_sliced = bench_speed(eval_sliced, jit=True)\n", - "results_sliced" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "speed_chart(results_sliced, 'USearch vs FAISS Speed with 256 Batch Size')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "eval_clustered = Evaluation.for_dataset(data, clusters=10_000)\n", - "len(eval_clustered.tasks)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "results_clustered = bench_speed(eval_clustered)\n", - "results_clustered" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "speed_chart(results_clustered, 'USearch vs FAISS Speed on Clustered Data')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "usearch-test", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/python/scripts/bench.py b/python/scripts/bench.py deleted file mode 100644 index 705e91ab6..000000000 --- a/python/scripts/bench.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env -S uv run --quiet --script -""" -USearch Benchmarking Utility - -This script provides benchmarking functions for USearch approximate nearest neighbor -search performance evaluation across different configurations and data types. - -Usage: - uv run python/scripts/bench.py - -Dependencies listed in the script header for uv to resolve automatically. -""" -# /// script -# dependencies = [ -# "numpy", -# "pandas", -# "usearch", -# "tqdm" -# ] -# /// - -import itertools -from typing import List -from dataclasses import asdict - -import numpy as np -import pandas as pd - -import usearch -from usearch.index import Index, Key, MetricKind, ScalarKind -from usearch.numba import jit as njit -from usearch.eval import Evaluation, AddTask -from usearch.index import ( - DEFAULT_CONNECTIVITY, - DEFAULT_EXPANSION_ADD, - DEFAULT_EXPANSION_SEARCH, -) - - -def bench_speed( - eval: Evaluation, - connectivity: int = DEFAULT_CONNECTIVITY, - expansion_add: int = DEFAULT_EXPANSION_ADD, - expansion_search: int = DEFAULT_EXPANSION_SEARCH, - jit: bool = False, - train: bool = False, -) -> pd.DataFrame: - # Build various indexes: - indexes = [] - jit_options = [False, True] if jit else [False] - dtype_options = [ScalarKind.F32, ScalarKind.F16, ScalarKind.BF16, ScalarKind.I8] - for jit, dtype in itertools.product(jit_options, dtype_options): - metric = MetricKind.IP - if jit: - metric = njit(eval.ndim, metric, dtype) - index = Index( - ndim=eval.ndim, - metric=metric, - dtype=dtype, - expansion_add=expansion_add, - expansion_search=expansion_search, - connectivity=connectivity, - path="USearch" + ["", "+JIT"][jit] + ":" + str(dtype), - ) - - # Skip the cases, where JIT-ing is impossible - if jit and not index.jit: - continue - indexes.append(index) - - # Add FAISS indexes to the mix: - try: - from index_faiss import IndexFAISS, IndexQuantizedFAISS - - indexes.append( - IndexFAISS( - ndim=eval.ndim, - expansion_add=expansion_add, - expansion_search=expansion_search, - connectivity=connectivity, - path="FAISS:f32", - ) - ) - if train: - indexes.append( - IndexQuantizedFAISS( - train=eval.tasks[0].vectors, - expansion_add=expansion_add, - expansion_search=expansion_search, - connectivity=connectivity, - path="FAISS+IVFPQ:f32", - ) - ) - except (ImportError, ModuleNotFoundError): - pass - - # Time to evaluate: - results = [eval(index) for index in indexes] - return pd.DataFrame( - { - "names": [i.path for i in indexes], - "add_per_second": [x["add_per_second"] for x in results], - "search_per_second": [x["search_per_second"] for x in results], - "recall_at_one": [x["recall_at_one"] for x in results], - } - ) - - -def bench_params( - count: int = 1_000_000, - connectivities: int = range(10, 20), - dimensions: List[int] = [ - 2, - 3, - 4, - 8, - 16, - 32, - 96, - 100, - 256, - 384, - 512, - 768, - 1024, - 1536, - ], - expansion_add: int = DEFAULT_EXPANSION_ADD, - expansion_search: int = DEFAULT_EXPANSION_SEARCH, -) -> pd.DataFrame: - """Measures indexing speed for different dimensionality vectors. - - :param count: Number of vectors, defaults to 1_000_000 - :type count: int, optional - """ - - results = [] - for connectivity, ndim in itertools.product(connectivities, dimensions): - task = AddTask( - keys=np.arange(count, dtype=Key), - vectors=np.random.rand(count, ndim).astype(np.float32), - ) - index = Index( - ndim=ndim, - connectivity=connectivity, - expansion_add=expansion_add, - expansion_search=expansion_search, - ) - result = asdict(task(index)) - result["ndim"] = dimensions - result["connectivity"] = connectivity - results.append(result) - - # return self._execute_tasks( - # tasks, - # title='HNSW Indexing Speed vs Vector Dimensions', - # x='ndim', y='add_per_second', log_x=True, - # ) - return pd.DataFrame(results) diff --git a/python/scripts/bench_cluster.py b/python/scripts/bench_cluster.py index 657821814..6b912c0e6 100644 --- a/python/scripts/bench_cluster.py +++ b/python/scripts/bench_cluster.py @@ -2,154 +2,200 @@ """ USearch Clustering Benchmarking -Benchmarks clustering performance comparing USearch k-means implementation -against FAISS baseline with various datasets and configurations. +Benchmarks K-means clustering comparing NumPy, FAISS, and USearch. +Supports comma-separated --backend for comparing multiple backends. + +Development setup (once): + uv venv && source .venv/bin/activate + uv pip install -e . --force-reinstall + uv pip install faiss-cpu # optional Usage: - uv run python/scripts/bench_cluster.py --help - uv run python/scripts/bench_cluster.py --data datasets/wiki_1M/base.1M.fbin - -Dependencies listed in the script header for uv to resolve automatically. + python python/scripts/bench_cluster.py --vectors data.fbin --clusters 10 --backend usearch + python python/scripts/bench_cluster.py --vectors data.fbin --clusters 10 --backend numpy,faiss,usearch """ # /// script # dependencies = [ # "numpy", -# "faiss-cpu", +# "faiss-cpu", # "usearch", # "tqdm" # ] # /// -import os + import argparse +from dataclasses import dataclass +from time import perf_counter -import numpy as np import faiss +import numpy as np from tqdm import tqdm import usearch from usearch.index import kmeans from usearch.io import load_matrix +SUPPORTED_BACKENDS = {"numpy", "faiss", "usearch"} -def evaluate_clustering_euclidean(X, labels, centroids): - """Evaluate clustering quality as average distance to centroids""" - distances = np.linalg.norm(X - centroids[labels], axis=1) - return np.mean(distances) +@dataclass +class ClusterBenchResult: + backend: str + duration_seconds: float + quality_euclidean: float + quality_cosine: float + cluster_sizes_mean: float + cluster_sizes_std: float -def evaluate_clustering_cosine(X, labels, centroids): - """Evaluate clustering quality as average cosine distance to centroids""" - # Normalize both data points and centroids - X_normalized = X / np.linalg.norm(X, axis=1, keepdims=True) - centroids_normalized = centroids / np.linalg.norm(centroids, axis=1, keepdims=True) +def _parse_csv(value: str) -> list[str]: + return [v.strip() for v in value.split(",") if v.strip()] - # Compute cosine similarity using dot product - cosine_similarities = np.sum(X_normalized * centroids_normalized[labels], axis=1) - # Convert cosine similarity to cosine distance - cosine_distances = 1 - cosine_similarities +def _evaluate_euclidean(vectors, labels, centroids): + distances = np.linalg.norm(vectors - centroids[labels], axis=1) + return np.mean(distances) + - # Return the average cosine distance - return np.mean(cosine_distances) +def _evaluate_cosine(vectors, labels, centroids): + vectors_normalized = vectors / np.linalg.norm(vectors, axis=1, keepdims=True) + centroids_normalized = centroids / np.linalg.norm(centroids, axis=1, keepdims=True) + cosine_similarities = np.sum(vectors_normalized * centroids_normalized[labels], axis=1) + return np.mean(1 - cosine_similarities) -def numpy_initialize_centroids(X, k): - """Randomly choose k data points as initial centroids""" - indices = np.random.choice(X.shape[0], k, replace=False) - return X[indices] +def _cluster_numpy(vectors, clusters, max_iterations=100, tolerance=1e-4): + indices = np.random.choice(vectors.shape[0], clusters, replace=False) + centroids = vectors[indices] + + for _iteration in tqdm(range(max_iterations), desc="KMeans"): + distances = np.linalg.norm(vectors[:, np.newaxis] - centroids, axis=2) + labels = np.argmin(distances, axis=1) + new_centroids = np.array([vectors[labels == i].mean(axis=0) for i in range(clusters)]) + if np.linalg.norm(new_centroids - centroids) < tolerance: + break + centroids = new_centroids + return labels, centroids -def numpy_assign_clusters(X, centroids): - """Assign each data point to the nearest centroid (numpy NumPy implementation)""" - distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2) - return np.argmin(distances, axis=1) +def _cluster_faiss(vectors, clusters, max_iterations=100): + dimensions = vectors.shape[1] + kmeans_index = faiss.Kmeans(dimensions, clusters, niter=max_iterations, verbose=False) + kmeans_index.train(vectors) + _, assignments = kmeans_index.index.search(vectors, 1) + return assignments.flatten(), kmeans_index.centroids -def numpy_update_centroids(X, labels, k): - """Compute new centroids as the mean of all data points assigned to each cluster""" - return np.array([X[labels == i].mean(axis=0) for i in range(k)]) +def _cluster_usearch(vectors, clusters, max_iterations=100): + assignments, _, centroids = kmeans(vectors, clusters, max_iterations=max_iterations) + return assignments, centroids -def cluster_with_numpy(X, k, max_iters=100, tol=1e-4): - centroids = numpy_initialize_centroids(X, k) - for i in tqdm(range(max_iters), desc="KMeans Iterations"): - labels = numpy_assign_clusters(X, centroids) - new_centroids = numpy_update_centroids(X, labels, k) +_BACKEND_RUNNERS = { + "numpy": _cluster_numpy, + "faiss": _cluster_faiss, + "usearch": _cluster_usearch, +} - if np.linalg.norm(new_centroids - centroids) < tol: - break - centroids = new_centroids +def _run_cluster(vectors, clusters, max_iterations, backend) -> ClusterBenchResult: + runner = _BACKEND_RUNNERS[backend] - return labels, centroids + start = perf_counter() + labels, centroids = runner(vectors, clusters, max_iterations=max_iterations) + elapsed = perf_counter() - start + quality_euclidean = _evaluate_euclidean(vectors, labels, centroids) + quality_cosine = _evaluate_cosine(vectors, labels, centroids) + sizes = np.unique(labels, return_counts=True)[1] -def cluster_with_faiss(X, k, max_iters=100): - # Docs: https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization - # Header: https://github.com/facebookresearch/faiss/blob/main/faiss/Clustering.h - # Source: https://github.com/facebookresearch/faiss/blob/main/faiss/Clustering.cpp - verbose = False - d: int = X.shape[1] - kmeans = faiss.Kmeans(d, k, niter=max_iters, verbose=verbose) - kmeans.train(X) - D, I = kmeans.index.search(X, 1) - return I.flatten(), kmeans.centroids + return ClusterBenchResult( + backend=backend, + duration_seconds=elapsed, + quality_euclidean=quality_euclidean, + quality_cosine=quality_cosine, + cluster_sizes_mean=np.mean(sizes), + cluster_sizes_std=np.std(sizes), + ) -def cluster_with_usearch(X, k, max_iters=100): - assignments, _, centroids = kmeans(X, k, max_iterations=max_iters) - return assignments, centroids +def _print_results(results: list[ClusterBenchResult], max_iterations: int) -> None: + if not results: + return + header = ( + f"{'Backend':<10} {'Time':>10} {'Time/iter':>12} " + f"{'Quality (L2)':>14} {'Quality (cos)':>14} {'Cluster sizes':>18}" + ) + print(f"\n{header}") + print("-" * len(header)) + for result in results: + print( + f"{result.backend:<10} {result.duration_seconds:>9,.2f}s {result.duration_seconds / max_iterations:>11,.3f}s " + f"{result.quality_euclidean:>14.4f} {result.quality_cosine:>14.4f} " + f"{result.cluster_sizes_mean:>8.0f} ยฑ {result.cluster_sizes_std:<6.0f}" + ) def main(): - parser = argparse.ArgumentParser(description="Compare KMeans clustering algorithms") - parser.add_argument("--vectors", type=str, required=True, help="Path to binary matrix file") - parser.add_argument("-k", default=10, type=int, required=True, help="Number of centroids") - parser.add_argument("-i", default=100, type=int, help="Upper bound on number of iterations") - parser.add_argument("-n", type=int, help="Upper bound on number of points to use") + parser = argparse.ArgumentParser( + description="Benchmark K-means clustering algorithms", + epilog=( + "Comma-separated --backend supported for comparing multiple backends.\n" + "Examples:\n" + " bench_cluster.py --vectors data.fbin --clusters 10 --backend numpy,faiss,usearch" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--vectors", type=str, required=True, help="Path to binary matrix file (.fbin)") + parser.add_argument("-k", "--clusters", default=10, type=int, help="Number of clusters (default: 10)") + parser.add_argument("-i", "--iterations", default=100, type=int, help="Maximum iterations (default: 100)") + parser.add_argument("-n", "--count", type=int, help="Limit number of vectors to use") parser.add_argument( - "--method", - type=str, - choices=["numpy", "faiss", "usearch"], + "--backend", default="numpy", - help="Clustering backend", + help=f"Backend(s), comma-separated (default: numpy). Choices: {', '.join(sorted(SUPPORTED_BACKENDS))}", ) args = parser.parse_args() - max_iters = args.i - X = load_matrix(args.vectors, count_rows=args.n) - k = args.k - method = args.method - - time_before = os.times() - if method == "usearch": - labels, centroids = cluster_with_usearch(X, k, max_iters=max_iters) - elif method == "faiss": - labels, centroids = cluster_with_faiss(X, k, max_iters=max_iters) - else: - labels, centroids = cluster_with_numpy(X, k, max_iters=max_iters) - time_after = os.times() - time_duration = time_after[0] - time_before[0] - print(f"Time: {time_duration:.2f}s, {time_duration / max_iters:.2f}s per iteration") - - quality = evaluate_clustering_euclidean(X, labels, centroids) - quality_cosine = evaluate_clustering_cosine(X, labels, centroids) - print(f"Clustering quality (average distance to centroids): {quality:.4f}, cosine: {quality_cosine:.4f}") - - # Let's compare it to some random uniform assignment - random_labels = np.random.randint(0, k, size=X.shape[0]) - random_quality = evaluate_clustering_euclidean(X, random_labels, centroids) - random_quality_cosine = evaluate_clustering_cosine(X, random_labels, centroids) - print(f"... while random assignment quality: {random_quality:.4f}, cosine: {random_quality_cosine:.4f}") - - cluster_sizes = np.unique(labels, return_counts=True)[1] - cluster_sizes_mean = np.mean(cluster_sizes) - cluster_sizes_stddev = np.std(cluster_sizes) - print(f"Cluster sizes: {cluster_sizes_mean:.2f} ยฑ {cluster_sizes_stddev:.2f}") - print(cluster_sizes) + backends = _parse_csv(args.backend) + for backend in backends: + if backend not in SUPPORTED_BACKENDS: + parser.error(f"Unknown backend '{backend}'. Choices: {', '.join(sorted(SUPPORTED_BACKENDS))}") + + # Library versions + print(f"USearch v{usearch.VERSION_MAJOR}.{usearch.VERSION_MINOR}.{usearch.VERSION_PATCH}") + if "faiss" in backends: + print(f"FAISS v{faiss.__version__}") + + # Load data + vectors = load_matrix(args.vectors, count_rows=args.count) + print(f"Loaded: {vectors.shape[0]:,} x {vectors.shape[1]}, clusters={args.clusters}, iterations={args.iterations}") + + # Run benchmarks + results: list[ClusterBenchResult] = [] + for backend in backends: + print(f"\n--- {backend} ---") + try: + results.append(_run_cluster(vectors, args.clusters, args.iterations, backend)) + except KeyboardInterrupt: + print("\n\nInterrupted โ€” printing results collected so far.") + break + except Exception as e: + print(f" Failed: {e}") + + _print_results(results, args.iterations) + + # Random baseline comparison + if results: + random_labels = np.random.randint(0, args.clusters, size=vectors.shape[0]) + last_centroids_result = results[-1] + # Use last backend's centroids for random baseline + _, centroids = _BACKEND_RUNNERS[last_centroids_result.backend](vectors, args.clusters, max_iterations=1) + random_quality = _evaluate_euclidean(vectors, random_labels, centroids) + random_cosine = _evaluate_cosine(vectors, random_labels, centroids) + print(f"\nRandom assignment baseline: L2={random_quality:.4f}, cos={random_cosine:.4f}") if __name__ == "__main__": diff --git a/python/scripts/bench_exact.py b/python/scripts/bench_exact.py index 11998332d..f141133cc 100644 --- a/python/scripts/bench_exact.py +++ b/python/scripts/bench_exact.py @@ -2,116 +2,284 @@ """ USearch Exact Search Benchmarking -Benchmarks exact nearest neighbor search performance comparing USearch -implementations against FAISS baseline with various data types and metrics. +Benchmarks exact (brute-force) nearest neighbor search. +Supports comma-separated --dtype, --metric, and --backend for cross-product sweeps. + +Development setup (once): + uv venv && source .venv/bin/activate + uv pip install -e . --force-reinstall + uv pip install faiss-cpu numkong # optional Usage: - uv run python/scripts/bench_exact.py --help - uv run python/scripts/bench_exact.py --count 10000 --ndim 256 - -Dependencies listed in the script header for uv to resolve automatically. + python python/scripts/bench_exact.py --dtype f32,f16 --backend usearch,faiss -k 10 + python python/scripts/bench_exact.py --dtype f32 --backend usearch,numkong -k 100 """ # /// script # dependencies = [ # "numpy", -# "faiss-cpu", # "usearch", # "tqdm" # ] # /// + import argparse -from time import time -from typing import Literal +import sys +from dataclasses import dataclass +from time import perf_counter -from faiss import knn, METRIC_L2, METRIC_INNER_PRODUCT +import numpy as np import usearch -from usearch.compiled import hardware_acceleration -from usearch.eval import random_vectors - -# Supplementary imports for CLI arguments normalization +from usearch import hardware_acceleration from usearch.index import ( - ScalarKind, MetricKind, - search, - _normalize_metric, _normalize_dtype, + _normalize_metric, + search, ) +SUPPORTED_DTYPES = {"b1", "bits", "i8", "u8", "f16", "bf16", "f32", "f64", "e4m3", "e3m2", "e2m3"} +SUPPORTED_METRICS = {"ip", "cos", "l2sq"} +SUPPORTED_BACKENDS = {"usearch", "faiss", "numkong"} + + +@dataclass +class ExactBenchResult: + library: str + dtype: str + metric: str + duration_seconds: float + queries_per_second: float + + +def _parse_csv(value: str) -> list[str]: + return [v.strip() for v in value.split(",") if v.strip()] -def format_duration(duration): - """Format duration in seconds to milliseconds, nicely formatted.""" - return f"{duration * 1000:,.2f} ms" +def _data_category(dtype: str) -> str: + if dtype in ("b1", "bits"): + return "bits" + if dtype in ("u8", "uint8"): + return "uint8" + if dtype in ("i8", "int8"): + return "int8" + return "float32" -def calculate_throughput(duration, count): - """Calculate and return throughput as calls per second.""" - if duration > 0: - return f"{count / duration:,.2f} calls/sec" - return "Inf calls/sec" +def _generate_vectors(count: int, dimensions: int, category: str) -> np.ndarray: + if category == "bits": + bits = np.random.randint(2, size=(count, dimensions)) + return np.packbits(bits, axis=1).astype(np.uint8) + if category == "uint8": + return np.random.randint(0, 256, size=(count, dimensions)).astype(np.uint8) + if category == "int8": + return np.random.randint(-128, 128, size=(count, dimensions)).astype(np.int8) + vectors = np.random.randn(count, dimensions).astype(np.float32) + vectors /= np.linalg.norm(vectors, axis=1, keepdims=True) + return vectors -def run( - n: int = 10**5, - q: int = 10, - k: int = 100, - ndim: int = 256, - dtype: Literal["b1", "i8", "f16", "bf16", "f32", "f64"] = "f32", - metric: Literal["ip", "cos", "l2sq"] = "ip", -): - metric: MetricKind = _normalize_metric(metric) - dtype: ScalarKind = _normalize_dtype(dtype, ndim=ndim, metric=metric) - acceleration = hardware_acceleration( +def _run_usearch( + haystack: np.ndarray, + queries: np.ndarray, + neighbors_count: int, + metric_kind: MetricKind, + queries_count: int, + dtype: str, + metric: str, +) -> ExactBenchResult: + start_time = perf_counter() + _result = search(haystack, queries, neighbors_count, metric=metric_kind, exact=True).keys + elapsed = perf_counter() - start_time + return ExactBenchResult( + library="USearch", dtype=dtype, - ndim=ndim, - metric_kind=metric, + metric=metric, + duration_seconds=elapsed, + queries_per_second=queries_count / elapsed if elapsed > 0 else float("inf"), ) - print(f"Hardware acceleration in USearch: {acceleration}") - x = random_vectors(n, ndim=ndim, dtype=dtype) - start = time() - _ = search( - x, - x[:q], - k, +def _run_faiss( + haystack: np.ndarray, + queries: np.ndarray, + neighbors_count: int, + metric_kind: MetricKind, + queries_count: int, + dtype: str, + metric: str, +) -> ExactBenchResult: + try: + from faiss import METRIC_INNER_PRODUCT, METRIC_L2, knn + except ImportError: + print("FAISS not available. Install: uv pip install faiss-cpu", file=sys.stderr) + sys.exit(1) + + if metric_kind not in (MetricKind.L2sq, MetricKind.IP): + raise ValueError(f"FAISS only supports l2sq and ip metrics, got {metric_kind}") + + faiss_metric = METRIC_L2 if metric_kind == MetricKind.L2sq else METRIC_INNER_PRODUCT + haystack_float32 = haystack.astype(np.float32) if haystack.dtype != np.float32 else haystack + queries_float32 = queries.astype(np.float32) if queries.dtype != np.float32 else queries + + start_time = perf_counter() + _distances, _ids = knn(queries_float32, haystack_float32, neighbors_count, metric=faiss_metric) + elapsed = perf_counter() - start_time + return ExactBenchResult( + library="FAISS", + dtype=dtype, metric=metric, - exact=True, - ).keys - duration = time() - start - print(f"USearch: {format_duration(duration)} ({calculate_throughput(duration, q)})") + duration_seconds=elapsed, + queries_per_second=queries_count / elapsed if elapsed > 0 else float("inf"), + ) - if metric not in [MetricKind.L2sq, MetricKind.IP]: - return - if dtype not in [ScalarKind.I8, ScalarKind.F16, ScalarKind.BF16, ScalarKind.F32, ScalarKind.F64]: - return - start = time() - faiss_metric = METRIC_L2 if metric == "l2sq" else METRIC_INNER_PRODUCT - _ = knn(x[:q], x, k, metric=faiss_metric)[1] - duration = time() - start - print(f"FAISS: {format_duration(duration)} ({calculate_throughput(duration, q)})") +def _run_numkong( + haystack: np.ndarray, + queries: np.ndarray, + neighbors_count: int, + metric_kind: MetricKind, + queries_count: int, + dtype: str, + metric: str, +) -> ExactBenchResult: + try: + import numkong as nk + except ImportError: + print("NumKong not available. Install: uv pip install numkong", file=sys.stderr) + sys.exit(1) + + nk_metric_map = { + MetricKind.L2sq: "sqeuclidean", + MetricKind.IP: "inner", + MetricKind.Cos: "angular", + } + nk_metric = nk_metric_map.get(metric_kind) + if nk_metric is None: + raise ValueError(f"NumKong does not support metric {metric_kind}") + + haystack_float32 = haystack.astype(np.float32) if haystack.dtype != np.float32 else haystack + queries_float32 = queries.astype(np.float32) if queries.dtype != np.float32 else queries + + start_time = perf_counter() + distance_matrix = nk.cdist(queries_float32, haystack_float32, metric=nk_metric) + _found_neighbors = np.argpartition(np.asarray(distance_matrix), neighbors_count, axis=1)[:, :neighbors_count] + elapsed = perf_counter() - start_time + return ExactBenchResult( + library="NumKong", + dtype=dtype, + metric=metric, + duration_seconds=elapsed, + queries_per_second=queries_count / elapsed if elapsed > 0 else float("inf"), + ) + + +_BACKEND_RUNNERS = { + "usearch": _run_usearch, + "faiss": _run_faiss, + "numkong": _run_numkong, +} + + +def _print_results(results: list[ExactBenchResult]) -> None: + if not results: + return + header = f"{'Library':<10} {'Dtype':<10} {'Metric':<8} {'Duration':>14} {'Throughput':>18}" + print(f"\n{header}") + print("-" * len(header)) + for result in results: + print( + f"{result.library:<10} {result.dtype:<10} {result.metric:<8} " + f"{result.duration_seconds * 1000:>11,.2f} ms {result.queries_per_second:>14,.0f} q/s" + ) def main(): - parser = argparse.ArgumentParser(description="Compare KMeans clustering algorithms") - parser.add_argument("--ndim", default=256, type=int, help="Number of vector dimensions") - parser.add_argument("-n", default=10**5, type=int, help="Number of random vectors in a haystack") - parser.add_argument("-q", default=10, type=int, help="Number of query vectors") - parser.add_argument("-k", default=100, type=int, required=True, help="Number of closest neighbors to search for") - parser.add_argument("--dtype", type=str, choices=["b1", "i8", "f16", "bf16", "f32", "f64"], default="f32") - parser.add_argument("--metric", type=str, choices=["ip", "cos", "l2sq"], default="ip") + parser = argparse.ArgumentParser( + description="Benchmark exact nearest neighbor search", + epilog=( + "Comma-separated lists supported for --dtype, --metric, --backend.\n" + "Examples:\n" + " bench_exact.py --dtype f32,f16 --backend usearch,faiss -k 10\n" + " bench_exact.py --dtype f32 --backend usearch,numkong -k 100" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("-d", "--dimensions", default=256, type=int, help="Number of dimensions (default: 256)") + parser.add_argument("-n", "--count", default=10**5, type=int, help="Number of vectors (default: 100000)") + parser.add_argument("-nq", "--queries-count", default=10, type=int, help="Number of queries (default: 10)") + parser.add_argument("-k", "--neighbors-count", default=100, type=int, help="Number of neighbors (default: 100)") + parser.add_argument( + "--dtype", + default="f32", + help=f"Data type(s), comma-separated (default: f32). Choices: {', '.join(sorted(SUPPORTED_DTYPES))}", + ) + parser.add_argument( + "--metric", + default="ip", + help=f"Metric(s), comma-separated (default: ip). Choices: {', '.join(sorted(SUPPORTED_METRICS))}", + ) + parser.add_argument( + "--backend", + default="usearch", + help=f"Backend(s), comma-separated (default: usearch). Choices: {', '.join(sorted(SUPPORTED_BACKENDS))}", + ) args = parser.parse_args() - run( - n=args.n, - q=args.q, - k=args.k, - ndim=args.ndim, - dtype=args.dtype, - metric=args.metric, - ) + + dtypes = _parse_csv(args.dtype) + metrics = _parse_csv(args.metric) + backends = _parse_csv(args.backend) + + for dtype in dtypes: + if dtype not in SUPPORTED_DTYPES: + parser.error(f"Unknown dtype '{dtype}'. Choices: {', '.join(sorted(SUPPORTED_DTYPES))}") + for metric in metrics: + if metric not in SUPPORTED_METRICS: + parser.error(f"Unknown metric '{metric}'. Choices: {', '.join(sorted(SUPPORTED_METRICS))}") + for backend in backends: + if backend not in SUPPORTED_BACKENDS: + parser.error(f"Unknown backend '{backend}'. Choices: {', '.join(sorted(SUPPORTED_BACKENDS))}") + + # Library versions + print(f"USearch v{usearch.VERSION_MAJOR}.{usearch.VERSION_MINOR}.{usearch.VERSION_PATCH}") + print(f" Compiled ISA: {usearch.hardware_acceleration_compiled()}") + print(f" Available ISA: {usearch.hardware_acceleration_available()}") + + # Generate datasets per category + queries_count = min(args.queries_count, args.count) + categories_needed = {_data_category(dtype) for dtype in dtypes} + datasets = {} + print(f"Generating: {args.count:,} vectors, {args.dimensions} dims, k={args.neighbors_count}") + for category in sorted(categories_needed): + vectors = _generate_vectors(args.count, args.dimensions, category) + datasets[category] = (vectors, vectors[:queries_count]) + print(f" {category}: {vectors.shape}, {vectors.dtype}") + + # Build configurations and run (Ctrl+C prints partial table) + configurations = [(backend, dtype, metric) for backend in backends for dtype in dtypes for metric in metrics] + + results: list[ExactBenchResult] = [] + for backend, dtype, metric in configurations: + category = _data_category(dtype) + haystack, queries = datasets[category] + metric_kind = _normalize_metric(metric) + + print(f"\n--- {backend} / {dtype} / {metric} ---") + scalar_kind = _normalize_dtype(dtype, ndim=args.dimensions, metric=metric_kind) + acceleration = hardware_acceleration(dtype=scalar_kind, ndim=args.dimensions, metric_kind=metric_kind) + print(f" Acceleration: {acceleration}") + + runner = _BACKEND_RUNNERS[backend] + try: + results.append(runner(haystack, queries, args.neighbors_count, metric_kind, queries_count, dtype, metric)) + except KeyboardInterrupt: + print("\n\nInterrupted โ€” printing results collected so far.") + break + except Exception as exception: + print(f" Failed: {exception}") + + _print_results(results) if __name__ == "__main__": diff --git a/python/scripts/bench_index.py b/python/scripts/bench_index.py new file mode 100644 index 000000000..2ddc0d9b1 --- /dev/null +++ b/python/scripts/bench_index.py @@ -0,0 +1,371 @@ +#!/usr/bin/env -S uv run --quiet --script +""" +USearch HNSW Index Benchmarking + +Benchmarks HNSW approximate nearest neighbor search configurations. +Supports comma-separated --dtype, --metric, and --backend for cross-product sweeps. + +Development setup (once): + uv venv && source .venv/bin/activate + uv pip install -e . --force-reinstall + uv pip install faiss-cpu # optional, for --backend faiss + +Development usage (uses local build from .venv): + python python/scripts/bench_index.py --dtype f32,f16,i8 -n 100000 -k 10 + python python/scripts/bench_index.py --dtype f32,b1 --metric ip,hamming --backend usearch,faiss + +Standalone usage (installs usearch from PyPI automatically): + uv run python/scripts/bench_index.py --dtype f32 -n 100000 -k 10 + +Examples: + bench_index.py --dtype f32,f16,i8 -n 100000 --neighbors-count 10 + bench_index.py --dtype f32,fp16 --backend usearch,faiss -n 100000 --neighbors-count 100 + bench_index.py --dtype b1 --metric hamming -n 100000 --dimensions 512 + bench_index.py --dtype i8 --vectors data/base.fbin --queries data/query.fbin +""" +# /// script +# dependencies = [ +# "numpy", +# "usearch", +# "tqdm" +# ] +# /// + +import argparse +import sys +from dataclasses import dataclass +from time import perf_counter + +import numpy as np + +import usearch +from usearch.eval import random_vectors +from usearch.index import ( + DEFAULT_CONNECTIVITY, + DEFAULT_EXPANSION_ADD, + DEFAULT_EXPANSION_SEARCH, + Index, + Key, + _normalize_dtype, + _normalize_metric, +) +from usearch.io import load_matrix + +SPATIAL_METRICS = {"ip", "cos", "l2sq"} +BITWISE_METRICS = {"hamming", "tanimoto", "jaccard", "sorensen"} +ALL_METRICS = SPATIAL_METRICS | BITWISE_METRICS + +USEARCH_DENSE_DTYPES = {"f64", "f32", "f16", "bf16", "e5m2", "e4m3", "e3m2", "e2m3", "i8", "u8"} +USEARCH_BINARY_DTYPES = {"b1", "bits"} + +# FAISS dtypes imported lazily from index_faiss.py when needed +_FAISS_DENSE_DTYPES = {"f32", "f16", "fp16", "bf16", "i8", "int8", "u8", "uint8"} +_FAISS_BINARY_DTYPES = {"b1", "bits"} + + +@dataclass +class BenchConfig: + """Single benchmark configuration.""" + + dtype: str + metric: str + backend: str + connectivity: int + expansion_add: int + expansion_search: int + neighbors_count: int + + +@dataclass +class BenchResult: + """Single benchmark result row.""" + + library: str + dtype: str + metric: str + acceleration: str + add_per_second: float + search_per_second: float + recall: float + neighbors_count: int + + +def _is_supported(backend: str, dtype: str, metric: str) -> bool: + """Check if a (backend, dtype, metric) combination is valid.""" + is_bitwise = metric in BITWISE_METRICS + if backend == "usearch": + valid_dtypes = USEARCH_BINARY_DTYPES if is_bitwise else USEARCH_DENSE_DTYPES + elif backend == "faiss": + valid_dtypes = _FAISS_BINARY_DTYPES if is_bitwise else _FAISS_DENSE_DTYPES + if is_bitwise and metric != "hamming": + return False + else: + return False + return dtype in valid_dtypes + + +def _data_category(dtype: str, metric: str) -> str: + """Return a key grouping dtypes that need the same synthetic data distribution.""" + if dtype in ("b1", "bits") or metric in BITWISE_METRICS: + return "bits" + if dtype in ("u8", "uint8"): + return "uint8" + if dtype in ("i8", "int8"): + return "int8" + return "float32" + + +def _recall_at_k(found_keys: np.ndarray, ground_truth: np.ndarray) -> float: + """Fraction of queries where any true neighbor appears in the top-k results.""" + hits = 0 + for i in range(found_keys.shape[0]): + if any(key in ground_truth[i] for key in found_keys[i]): + hits += 1 + return hits / found_keys.shape[0] + + +def _parse_csv(value: str) -> list[str]: + """Split a comma-separated string into a list of stripped tokens.""" + return [v.strip() for v in value.split(",") if v.strip()] + + +def _create_index(config: BenchConfig, dimensions: int): + """Create a USearch or FAISS index based on the config. Returns an index with add/search/hardware_acceleration.""" + is_binary = config.metric in BITWISE_METRICS + + if config.backend == "usearch": + metric_kind = _normalize_metric(config.metric) + ndim = dimensions * 8 if is_binary else dimensions + scalar_kind = _normalize_dtype(config.dtype, ndim=ndim, metric=metric_kind) + return Index( + ndim=ndim, + metric=metric_kind, + dtype=scalar_kind, + connectivity=config.connectivity, + expansion_add=config.expansion_add, + expansion_search=config.expansion_search, + ) + + if config.backend == "faiss": + from index_faiss import IndexFAISS + + return IndexFAISS( + dimensions=dimensions, + metric=config.metric, + dtype=config.dtype, + connectivity=config.connectivity, + expansion_add=config.expansion_add, + expansion_search=config.expansion_search, + ) + + raise ValueError(f"Unknown backend: {config.backend}") + + +def _run_benchmark( + dtype_name: str, + vectors: np.ndarray, + queries: np.ndarray, + ground_truth: np.ndarray | None, + config: BenchConfig, +) -> BenchResult: + """Run a single benchmark configuration. Returns a BenchResult.""" + dimensions = vectors.shape[1] + index = _create_index(config, dimensions) + keys = np.arange(vectors.shape[0], dtype=Key) + + start = perf_counter() + index.add(keys, vectors, log=True, dtype=dtype_name) + add_elapsed = perf_counter() - start + + start = perf_counter() + matches = index.search(queries, config.neighbors_count, log=True, dtype=dtype_name) + search_elapsed = perf_counter() - start + + found_keys = matches.keys.reshape(-1, config.neighbors_count) + recall = _recall_at_k(found_keys, ground_truth) if ground_truth is not None else float("nan") + + return BenchResult( + library="FAISS" if config.backend == "faiss" else "USearch", + dtype=config.dtype, + metric=config.metric, + acceleration=index.hardware_acceleration, + add_per_second=vectors.shape[0] / add_elapsed if add_elapsed > 0 else float("inf"), + search_per_second=queries.shape[0] / search_elapsed if search_elapsed > 0 else float("inf"), + recall=recall, + neighbors_count=config.neighbors_count, + ) + + +def _print_results(results: list[BenchResult]) -> None: + if not results: + return + k = results[0].neighbors_count + recall_col = f"Recall@{k}" + header = ( + f"{'Library':<10} {'Quantization':<14} {'Metric':<10} {'Acceleration':<14} " + f"{'Add/s':>12} {'Search/s':>12} {recall_col:>10}" + ) + print(f"\n{header}") + print("-" * len(header)) + for result in results: + recall_str = f"{result.recall * 100:.1f}%" if not np.isnan(result.recall) else "N/A" + print( + f"{result.library:<10} {result.dtype:<14} {result.metric:<10} {result.acceleration:<14} " + f"{result.add_per_second:>12,.0f} {result.search_per_second:>12,.0f} {recall_str:>10}" + ) + + +def main(): + all_dtypes = sorted(USEARCH_DENSE_DTYPES | USEARCH_BINARY_DTYPES | _FAISS_DENSE_DTYPES | _FAISS_BINARY_DTYPES) + parser = argparse.ArgumentParser( + description="Benchmark HNSW approximate search configurations", + epilog=( + "Comma-separated lists supported for --dtype, --metric, --backend.\n" + "Examples:\n" + " bench_index.py --dtype f32,f16,i8 --metric ip -n 100000 --neighbors-count 10\n" + " bench_index.py --dtype b1 --metric hamming -n 100000 --dimensions 512\n" + " bench_index.py --dtype f32,b1 --metric ip,hamming --backend usearch,faiss" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + # Data source + parser.add_argument("--vectors", type=str, help="Path to base vectors file (.fbin, .hbin, .i8bin, .u8bin)") + parser.add_argument("--queries", type=str, help="Path to query vectors file") + parser.add_argument("--neighbors", type=str, help="Path to ground truth neighbors file (.ibin)") + + # Synthetic data parameters + parser.add_argument("-n", "--count", default=100_000, type=int, help="Number of vectors (default: 100000)") + parser.add_argument("-d", "--dimensions", default=256, type=int, help="Number of dimensions (default: 256)") + parser.add_argument("-nq", "--queries-count", default=1000, type=int, help="Number of queries (default: 1000)") + + # Configuration โ€” all accept comma-separated lists + parser.add_argument( + "--dtype", + default="f32", + help=f"Quantization type(s), comma-separated (default: f32). Choices: {', '.join(all_dtypes)}", + ) + parser.add_argument( + "--metric", + default="ip", + help=f"Distance metric(s), comma-separated (default: ip). Choices: {', '.join(sorted(ALL_METRICS))}", + ) + parser.add_argument( + "--backend", + default="usearch", + help="Backend(s), comma-separated (default: usearch). Choices: usearch, faiss", + ) + parser.add_argument("-k", "--neighbors-count", default=10, type=int, help="Number of neighbors (default: 10)") + + # HNSW parameters + parser.add_argument("-c", "--connectivity", type=int, default=DEFAULT_CONNECTIVITY, help="HNSW connectivity (M)") + parser.add_argument("--expansion-add", type=int, default=DEFAULT_EXPANSION_ADD) + parser.add_argument("--expansion-search", type=int, default=DEFAULT_EXPANSION_SEARCH) + + args = parser.parse_args() + + # Parse comma-separated lists + dtypes = _parse_csv(args.dtype) + metrics = _parse_csv(args.metric) + backends = _parse_csv(args.backend) + + # Validate + for dtype in dtypes: + if dtype not in all_dtypes: + parser.error(f"Unknown dtype '{dtype}'. Choices: {', '.join(all_dtypes)}") + for metric in metrics: + if metric not in ALL_METRICS: + parser.error(f"Unknown metric '{metric}'. Choices: {', '.join(sorted(ALL_METRICS))}") + for backend in backends: + if backend not in ("usearch", "faiss"): + parser.error(f"Unknown backend '{backend}'. Choices: usearch, faiss") + + # Auto-add hamming metric when b1 dtype requested without any bitwise metric + if any(dtype in ("b1", "bits") for dtype in dtypes) and not any(metric in BITWISE_METRICS for metric in metrics): + metrics.append("hamming") + + # Library versions and hardware info + print(f"USearch v{usearch.VERSION_MAJOR}.{usearch.VERSION_MINOR}.{usearch.VERSION_PATCH}") + print(f" Compiled ISA: {usearch.hardware_acceleration_compiled()}") + print(f" Available ISA: {usearch.hardware_acceleration_available()}") + if "faiss" in backends: + try: + from index_faiss import hardware_acceleration_available as faiss_isa + from index_faiss import version as faiss_version + + print(f"FAISS v{faiss_version()} (ISA: {faiss_isa()})") + except ImportError: + print("FAISS not available. Install: uv pip install faiss-cpu", file=sys.stderr) + backends = [backend for backend in backends if backend != "faiss"] + + # Build valid (backend, dtype, metric) configurations + configurations = [ + BenchConfig( + dtype=dtype, + metric=metric, + backend=backend, + connectivity=args.connectivity, + expansion_add=args.expansion_add, + expansion_search=args.expansion_search, + neighbors_count=args.neighbors_count, + ) + for backend in backends + for dtype in dtypes + for metric in metrics + if _is_supported(backend, dtype, metric) + ] + + # Load real data or generate synthetic datasets per dtype category + if args.vectors: + print(f"Loading vectors from {args.vectors}") + base_vectors = load_matrix(args.vectors) + query_vectors = ( + load_matrix(args.queries) + if args.queries + else base_vectors[: min(args.queries_count, base_vectors.shape[0])] + ) + ground_truth = load_matrix(args.neighbors) if args.neighbors else None + datasets = None + print(f"Loaded: {base_vectors.shape[0]:,} x {base_vectors.shape[1]}, k={args.neighbors_count}") + else: + queries_count = min(args.queries_count, args.count) + categories_needed = {_data_category(config.dtype, config.metric) for config in configurations} + datasets = {} + print( + f"Generating synthetic datasets: {args.count:,} vectors, {args.dimensions} dims, k={args.neighbors_count}" + ) + for category in sorted(categories_needed): + scalar_kind = _normalize_dtype({"float32": "f32", "uint8": "u8", "int8": "i8", "bits": "b1"}[category]) + metric_kind = _normalize_metric("hamming" if category == "bits" else "ip") + vectors = random_vectors( + count=args.count, ndim=args.dimensions, metric=metric_kind, quantization=scalar_kind + ) + ground_truth_self = np.arange(queries_count, dtype=np.int64).reshape(-1, 1) + datasets[category] = (vectors, vectors[:queries_count], ground_truth_self) + print(f" {category}: {vectors.shape}, {vectors.dtype}") + base_vectors = query_vectors = ground_truth = None + + # Run benchmarks (Ctrl+C prints partial table) + results: list[BenchResult] = [] + for config in configurations: + if datasets is not None: + category = _data_category(config.dtype, config.metric) + vectors, queries, ground_truth = datasets[category] + else: + category = "float32" + vectors, queries = base_vectors, query_vectors + + print(f"\n--- {config.backend} / {config.dtype} / {config.metric} ---") + try: + results.append(_run_benchmark(category, vectors, queries, ground_truth, config)) + except KeyboardInterrupt: + print("\n\nInterrupted โ€” printing results collected so far.") + break + except Exception as exception: + print(f" Failed: {exception}") + + _print_results(results) + + +if __name__ == "__main__": + main() diff --git a/python/scripts/cluster.ipynb b/python/scripts/cluster.ipynb deleted file mode 100644 index ff45e7454..000000000 --- a/python/scripts/cluster.ipynb +++ /dev/null @@ -1,160 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from usearch.io import load_matrix\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vectors = load_matrix(\"datasets/arxiv_2M/abstract.e5-base-v2.fbin\", view=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vectors.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from usearch.index import Index, USES_SIMSIMD, USES_FP16LIB\n", - "\n", - "index = Index(ndim=vectors.shape[1], metric=\"cos\", dtype=\"i8\")\n", - "index.hardware_acceleration, USES_SIMSIMD, USES_FP16LIB" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import os\n", - "\n", - "if os.path.exists(\"cluster.i8.usearch\"):\n", - " index.load(\"cluster.i8.usearch\")\n", - "\n", - "if len(index) == 0:\n", - " index.add(None, vectors, log=True)\n", - " index.save(\"cluster.i8.usearch\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "index" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "clustering = index.cluster(min_count=10, max_count=15, threads=60)\n", - "clustering" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"{len(clustering.centroids_popularity[0])} unique clusters for {len(index)} members\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "clustering.plot_centroids_popularity()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nx.draw_networkx(g)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import networkx as nx\n", - "\n", - "G = g\n", - "\n", - "node_sizes = nx.get_node_attributes(G, \"size\").values()\n", - "max_node_size = max(node_sizes)\n", - "node_sizes = [size * 10 / max_node_size for size in node_sizes]\n", - "\n", - "edge_labels = nx.get_edge_attributes(G, \"distance\")\n", - "edge_labels = {edge: f\"{label:.2}\" for edge, label in edge_labels.items()}\n", - "\n", - "pos = nx.spring_layout(G, seed=7)\n", - "nx.draw_networkx_nodes(G, pos, node_size=node_sizes)\n", - "nx.draw_networkx_edges(G, pos, edgelist=G.edges(data=False))\n", - "nx.draw_networkx_labels(G, pos, font_size=10, font_family=\"sans-serif\")\n", - "nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=5)\n", - "\n", - "ax = plt.gca()\n", - "ax.margins(0.08)\n", - "plt.axis(\"off\")\n", - "plt.tight_layout()\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/python/scripts/index_faiss.py b/python/scripts/index_faiss.py index a54438f42..f3288ab75 100644 --- a/python/scripts/index_faiss.py +++ b/python/scripts/index_faiss.py @@ -1,121 +1,185 @@ -#!/usr/bin/env -S uv run --quiet --script """ -FAISS Index Wrapper for USearch Benchmarking +FAISS HNSW Index Wrapper -Provides FAISS index implementations compatible with USearch benchmarking framework -for performance comparison between USearch and FAISS libraries. - -Usage: - uv run python/scripts/index_faiss.py - -Dependencies listed in the script header for uv to resolve automatically. +Provides a USearch-compatible interface around FAISS HNSW indices for +benchmarking. Supports dense (IndexHNSWFlat/IndexHNSWSQ) and binary +(IndexBinaryHNSW) index types with batched tqdm progress bars. """ -# /// script -# dependencies = [ -# "numpy", -# "faiss-cpu", -# "usearch" -# ] -# /// -import os -from typing import Optional +from dataclasses import dataclass +import faiss import numpy as np -from faiss import IndexHNSWFlat, IndexIVFPQ, read_index +from tqdm import tqdm -import usearch -from usearch.index import BatchMatches -from usearch.index import ( - DEFAULT_CONNECTIVITY, - DEFAULT_EXPANSION_ADD, - DEFAULT_EXPANSION_SEARCH, -) +# FAISS dtype โ†’ ScalarQuantizer type mapping (no learnable quantizers). +_SCALAR_QUANTIZER_MAP: dict[str, int | None] = { + "f32": None, + "f16": faiss.ScalarQuantizer.QT_fp16, + "fp16": faiss.ScalarQuantizer.QT_fp16, + "bf16": faiss.ScalarQuantizer.QT_bf16, + "u8": faiss.ScalarQuantizer.QT_8bit_direct, + "uint8": faiss.ScalarQuantizer.QT_8bit_direct, + "i8": faiss.ScalarQuantizer.QT_8bit_direct_signed, + "int8": faiss.ScalarQuantizer.QT_8bit_direct_signed, +} +DENSE_DTYPES = set(_SCALAR_QUANTIZER_MAP.keys()) +BINARY_DTYPES = {"b1", "bits"} +ALL_DTYPES = DENSE_DTYPES | BINARY_DTYPES -class IndexFAISS: - def __init__( - self, - index: IndexHNSWFlat = None, - ndim: int = 0, - connectivity: int = DEFAULT_CONNECTIVITY, - expansion_add: int = DEFAULT_EXPANSION_ADD, - expansion_search: int = DEFAULT_EXPANSION_SEARCH, - path: Optional[os.PathLike] = None, - *args, - **kwargs, - ): - if index is None: - index = IndexHNSWFlat(ndim, connectivity) - index.hnsw.efConstruction = expansion_add - index.hnsw.efSearch = expansion_search - - self._faiss = index - self._specs = { - "Class": "usearch.IndexFAISS", - "Dimensions": ndim, - "Connectivity": connectivity, - "Expansion@Add": expansion_add, - "Expansion@Search": expansion_search, - } - - self.path = path - - def add(self, keys, vectors): - # Adding keys isn't supported for most index types - # self._faiss.add_with_ids(vectors, keys) - self._faiss.add(vectors) - - def search(self, queries, k: int) -> BatchMatches: - distances, keys = self._faiss.search(queries, k) - return BatchMatches(keys, distances, np.array([k] * queries.shape[0])) +BITWISE_METRICS = {"hamming", "tanimoto", "jaccard", "sorensen"} - def __len__(self) -> int: - return self._faiss.ntotal - def clear(self): - self._faiss.reset() +def hardware_acceleration() -> str: + """Detect which SIMD backend FAISS loaded at import time.""" + if hasattr(faiss, "_swigfaiss_avx512"): + return "avx512" + if hasattr(faiss, "_swigfaiss_avx2"): + return "avx2" + return "generic" - @property - def specs(self) -> dict: - self._specs.update( - { - "Size": len(self), - } - ) - return self._specs - def load(self, path: os.PathLike): - self._faiss = read_index(path) +def hardware_acceleration_available() -> str: + """Comma-separated list of SIMD capabilities available in this FAISS build.""" + flags = [] + if getattr(faiss, "has_AVX512", False): + flags.append("avx512") + if getattr(faiss, "has_AVX512_SPR", False): + flags.append("avx512-spr") + if getattr(faiss, "has_AVX2", False): + flags.append("avx2") + return ", ".join(flags) if flags else "generic" + + +def version() -> str: + """FAISS version string.""" + return getattr(faiss, "__version__", "unknown") + +@dataclass +class FaissMatches: + """Match result compatible with USearch BatchMatches for recall computation.""" + + keys: np.ndarray + distances: np.ndarray + + +class IndexFAISS: + """FAISS HNSW wrapper matching the USearch Index interface for benchmarking. + + Keys passed to :meth:`add` are ignored โ€” FAISS assigns sequential IDs + starting from 0. The :meth:`search` return type mimics USearch's + ``BatchMatches`` with ``.keys`` and ``.distances`` attributes. + """ -class IndexQuantizedFAISS(IndexFAISS): def __init__( self, - train: np.ndarray, - connectivity: int = DEFAULT_CONNECTIVITY, - expansion_add: int = DEFAULT_EXPANSION_ADD, - expansion_search: int = DEFAULT_EXPANSION_SEARCH, - *args, - **kwargs, + dimensions: int, + metric: str, + dtype: str = "f32", + connectivity: int = 16, + expansion_add: int = 128, + expansion_search: int = 64, ): - ndim = train.shape[1] - super().__init__( - ndim=ndim, - connectivity=connectivity, - expansion_add=expansion_add, - expansion_search=expansion_search, - *args, - **kwargs, - ) - - nlist = 10000 # Number of inverted lists (number of partitions). - nsegment = 16 # Number of segments for PQ (number of subquantizers). - nbit = 8 # Number of bits to encode each segment. - - self._original_faiss = self._faiss - self._faiss = IndexIVFPQ(self._faiss, ndim, nlist, nsegment, nbit) - - self._faiss.train(train) - self._faiss.nprobe = 10 - self._specs["Class"] = "usearch.IndexQuantizedFAISS" + self.dimensions = dimensions + self.metric_name = metric + self.dtype = dtype + self._is_binary = metric in BITWISE_METRICS + + if self._is_binary: + dimensions_bits = dimensions * 8 if dtype in ("b1", "bits") else dimensions + self._index = faiss.IndexBinaryHNSW(dimensions_bits, connectivity) + else: + if dtype not in _SCALAR_QUANTIZER_MAP: + raise ValueError( + f"FAISS does not support dtype '{dtype}'. Supported: {', '.join(sorted(_SCALAR_QUANTIZER_MAP))}" + ) + faiss_metric = faiss.METRIC_L2 if metric == "l2sq" else faiss.METRIC_INNER_PRODUCT + sq_type = _SCALAR_QUANTIZER_MAP[dtype] + if sq_type is None: + self._index = faiss.IndexHNSWFlat(dimensions, connectivity, faiss_metric) + else: + self._index = faiss.IndexHNSWSQ(dimensions, sq_type, connectivity, faiss_metric) + + self._index.hnsw.efConstruction = expansion_add + self._index.hnsw.efSearch = expansion_search + + def add(self, _keys, vectors: np.ndarray, *, log: bool = False, dtype: str = "") -> None: + """Add vectors to the index. + + :param _keys: Ignored. FAISS uses sequential IDs internally. + :param vectors: Row-major matrix of vectors to add. + :param log: Show tqdm progress bar. + :param dtype: Ignored. Accepted for USearch API compatibility. + """ + if self._is_binary: + data = vectors if vectors.dtype == np.uint8 else vectors.astype(np.uint8) + else: + data = vectors if vectors.dtype == np.float32 else vectors.astype(np.float32) + + count = data.shape[0] + batch_size = max(10_000, count // 20) + + if log: + with tqdm(total=count, desc="Add", unit="vector") as progress_bar: + for start in range(0, count, batch_size): + end = min(start + batch_size, count) + self._index.add(data[start:end]) + progress_bar.update(end - start) + else: + self._index.add(data) + + def search(self, queries: np.ndarray, neighbors_count: int, *, log: bool = False, dtype: str = "") -> FaissMatches: + """Search the index for nearest neighbors. + + :param queries: Row-major matrix of query vectors. + :param neighbors_count: Number of neighbors to retrieve per query. + :param log: Show tqdm progress bar. + :param dtype: Ignored. Accepted for USearch API compatibility. + :return: FaissMatches with ``.keys`` and ``.distances`` arrays. + """ + if self._is_binary: + data = queries if queries.dtype == np.uint8 else queries.astype(np.uint8) + else: + data = queries if queries.dtype == np.float32 else queries.astype(np.float32) + + count = data.shape[0] + batch_size = max(100, count // 20) + + if log: + all_distances = [] + all_ids = [] + with tqdm(total=count, desc="Search", unit="vector") as progress_bar: + for start in range(0, count, batch_size): + end = min(start + batch_size, count) + distances, ids = self._index.search(data[start:end], neighbors_count) + all_distances.append(distances) + all_ids.append(ids) + progress_bar.update(end - start) + return FaissMatches(keys=np.vstack(all_ids), distances=np.vstack(all_distances)) + + distances, ids = self._index.search(data, neighbors_count) + return FaissMatches(keys=ids, distances=distances) + + def save(self, path: str) -> None: + """Save the index to disk.""" + if self._is_binary: + faiss.write_index_binary(self._index, path) + else: + faiss.write_index(self._index, path) + + def load(self, path: str) -> None: + """Load the index from disk, replacing the current index.""" + if self._is_binary: + self._index = faiss.read_index_binary(path) + else: + self._index = faiss.read_index(path) + + @property + def hardware_acceleration(self) -> str: + """SIMD backend FAISS is using (e.g., 'avx512', 'avx2', 'generic').""" + return hardware_acceleration() + + def __len__(self) -> int: + return self._index.ntotal diff --git a/python/scripts/join.py b/python/scripts/join.py index 8e3c71f6b..f5998bfd2 100644 --- a/python/scripts/join.py +++ b/python/scripts/join.py @@ -1,150 +1,137 @@ #!/usr/bin/env -S uv run --quiet --script """ -USearch Index Joining Utility +USearch Index Joining Benchmark -Script for joining and benchmarking multimodal datasets (images and texts) -using different embedding models and distance metrics. Includes support for -cross-modal search and evaluation. +Benchmarks cross-modal join and search on multimodal datasets (e.g., images +and texts). Evaluates self-recall, cross-recall, and bipartite join quality. Usage: - uv run python/scripts/join.py - -To download the required datasets, run: -wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/clip_images.fbin -P datasets/cc_3M/ -wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/clip_texts.fbin -P datasets/cc_3M/ -wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/images.fbin -P datasets/cc_3M/ -wget -nc https://huggingface.co/datasets/unum-cloud/ann-cc-3m/resolve/main/texts.fbin -P datasets/cc_3M/ + uv run python/scripts/join.py --help + uv run python/scripts/join.py \\ + --vectors-a datasets/cc_3M/texts.fbin \\ + --vectors-b datasets/cc_3M/images.fbin \\ + --metric cos -n 100000 Dependencies listed in the script header for uv to resolve automatically. """ # /// script # dependencies = [ # "numpy", -# "simsimd", +# "numkong", # "usearch", # "tqdm" # ] # /// +import argparse +from time import perf_counter + +import numpy as np from numpy import dot from numpy.linalg import norm from tqdm import tqdm -from simsimd import cos_f32x4_neon, to_int -import usearch -from usearch.index import Index, MetricKind, CompiledMetric, MetricSignature +from usearch.eval import measure_seconds, random_vectors +from usearch.index import CompiledMetric, Index, MetricKind, MetricSignature from usearch.io import load_matrix -from usearch.eval import measure_seconds - -count = 10 -exact = False -batch_size = 1024 * 4 -max_elements = 1000000 - -a_name = "cc_3M/texts" -b_name = "cc_3M/images" - -a_mat = load_matrix(f"datasets/{a_name}.fbin", view=True) -b_mat = load_matrix(f"datasets/{b_name}.fbin", view=True) - -a_mat = a_mat[:max_elements] -b_mat = b_mat[:max_elements] - -print(f"Loaded two datasets of shape: {a_mat.shape}, {b_mat.shape}") -print("--------------------------------------") -print("---------------Indexing---------------") -print("--------------------------------------") - -metric = CompiledMetric( - pointer=to_int(cos_f32x4_neon), - kind=MetricKind.Cos, - signature=MetricSignature.ArrayArraySize, -) - -a = Index( - a_mat.shape[1], - metric=metric, - path=f"datasets/{a_name}-{max_elements}.f32.usearch", - dtype="f32", -) -b = Index( - b_mat.shape[1], - metric=metric, - path=f"datasets/{b_name}-{max_elements}.f32.usearch", - dtype="f32", -) - -if len(a) != a_mat.shape[0]: - a.clear() - a.add(None, a_mat, log=True, batch_size=batch_size) - a.save() - -if len(b) != b_mat.shape[0]: - b.clear() - b.add(None, b_mat, log=True, batch_size=batch_size) - b.save() - - -print(f"Loaded two indexes of size: {len(a):,} for {a_name} and {len(b):,} for {b_name}") -min_elements = min(len(a), len(b)) - -run_diagnostics = input("Would you like to run diagnostics? [Y/n]: ") -if len(run_diagnostics) == 0 or run_diagnostics.lower() == "y": - print("--------------------------------------") - print("-------------Diagnostics--------------") - print("--------------------------------------") - - mean_similarity = 0.0 - mean_recovered_similarity = 0.0 - - for i in tqdm(range(min_elements), desc="Pairwise Similarity"): - a_vec = a_mat[i] - b_vec = b_mat[i] - cos_similarity = dot(a_vec, b_vec) / (norm(a_vec) * norm(b_vec)) - mean_similarity += cos_similarity - - a_vec = a[i] - b_vec = b[i] - cos_similarity = dot(a_vec, b_vec) / (norm(a_vec) * norm(b_vec)) - mean_recovered_similarity += cos_similarity - - mean_similarity /= min_elements - mean_recovered_similarity /= min_elements - print( - f"Average vector similarity is {mean_similarity:.4f} in original dataset, " - f"and {mean_recovered_similarity:.4f} in recovered state in index" - ) - - dt = measure_seconds - args = dict( - count=count, - batch_size=batch_size, - log=True, - exact=exact, - ) - - secs, a_self_recall = dt(lambda: a.search(a.vectors, **args).recall(a.keys)) - print("Self-recall @{} of {} index: {:.2f}%, took {:.2f}s".format(count, a_name, a_self_recall * 100, secs)) - secs, b_self_recall = dt(lambda: b.search(b.vectors, **args).recall(b.keys)) - print("Self-recall @{} of {} index: {:.2f}%, took {:.2f}s".format(count, b_name, b_self_recall * 100, secs)) - secs, ab_recall = dt(lambda: b.search(a.vectors, **args).recall(b.keys)) - print("Cross-recall @{} of {} in {}: {:.2f}%, took {:.2f}s".format(count, a_name, b_name, ab_recall * 100, secs)) - - secs, ba_recall = dt(lambda: a.search(b.vectors, **args).recall(a.keys)) - print("Cross-recall @{} of {} in {}: {:.2f}%, took {:.2f}s".format(count, b_name, a_name, ba_recall * 100, secs)) - - -print("--------------------------------------") -print("-----------------Join-----------------") -print("--------------------------------------") - -secs, bimapping = measure_seconds(lambda: a.join(b, max_proposals=100)) -mapping_size = len(bimapping) -recall = 0 -for i, j in bimapping.items(): - recall += i == j - -recall *= 100.0 / min_elements -print(f"Took {secs:.2f}s to find {mapping_size:,} pairings with {recall:.2f}% being exact") +def main(): + parser = argparse.ArgumentParser( + description="Benchmark cross-modal index joining and search", + epilog="If --vectors-a/--vectors-b not provided, generates synthetic data.", + ) + parser.add_argument("--vectors-a", type=str, help="Path to first dataset (.fbin)") + parser.add_argument("--vectors-b", type=str, help="Path to second dataset (.fbin)") + parser.add_argument("--metric", type=str, choices=["ip", "cos", "l2sq"], default="cos", help="Distance metric") + parser.add_argument("-n", "--count", type=int, default=100_000, help="Max vectors per dataset (default: 100000)") + parser.add_argument("--ndim", type=int, default=256, help="Dimensions for synthetic data (default: 256)") + parser.add_argument("-k", type=int, default=10, help="Number of neighbors for recall evaluation (default: 10)") + parser.add_argument("--dtype", type=str, default="f32", help="Quantization type (default: f32)") + parser.add_argument("--diagnostics", action="store_true", help="Run self-recall and cross-recall diagnostics") + + args = parser.parse_args() + + # Load or generate data + if args.vectors_a and args.vectors_b: + vectors_a = load_matrix(args.vectors_a, count_rows=args.count) + vectors_b = load_matrix(args.vectors_b, count_rows=args.count) + print(f"Loaded datasets: A={vectors_a.shape}, B={vectors_b.shape}") + else: + print(f"Generating synthetic data: {args.count:,} x {args.ndim}") + vectors_a = random_vectors(args.count, ndim=args.ndim).astype(np.float32) + vectors_b = random_vectors(args.count, ndim=args.ndim).astype(np.float32) + + ndim = vectors_a.shape[1] + min_elements = min(vectors_a.shape[0], vectors_b.shape[0]) + + # Build metric + try: + from numkong import pointer_to_angular + + metric = CompiledMetric( + pointer=pointer_to_angular("f32"), + kind=MetricKind.Cos, + signature=MetricSignature.ArrayArraySize, + ) + except ImportError: + metric = MetricKind.Cos + + # Build indexes + print("--- Indexing ---") + index_a = Index(ndim, metric=metric, dtype=args.dtype) + index_b = Index(ndim, metric=metric, dtype=args.dtype) + + index_a.add(None, vectors_a, log=True) + index_b.add(None, vectors_b, log=True) + print(f"Indexed: A={len(index_a):,}, B={len(index_b):,}") + + # Diagnostics + if args.diagnostics: + print("\n--- Diagnostics ---") + + # Pairwise similarity + mean_sim = 0.0 + for i in tqdm(range(min_elements), desc="Pairwise Similarity"): + a_vec, b_vec = vectors_a[i], vectors_b[i] + a_norm, b_norm = norm(a_vec), norm(b_vec) + if a_norm > 0 and b_norm > 0: + mean_sim += dot(a_vec, b_vec) / (a_norm * b_norm) + mean_sim /= min_elements + print(f"Average pairwise cosine similarity: {mean_sim:.4f}") + + search_kwargs = dict(count=args.k, log=True) + + secs, recall_a = measure_seconds( + lambda: index_a.search(vectors_a, **search_kwargs).recall(np.arange(len(index_a))) + ) + print(f"Self-recall @{args.k} of A: {recall_a * 100:.2f}% ({secs:.2f}s)") + + secs, recall_b = measure_seconds( + lambda: index_b.search(vectors_b, **search_kwargs).recall(np.arange(len(index_b))) + ) + print(f"Self-recall @{args.k} of B: {recall_b * 100:.2f}% ({secs:.2f}s)") + + secs, recall_ab = measure_seconds( + lambda: index_b.search(vectors_a, **search_kwargs).recall(np.arange(min_elements)) + ) + print(f"Cross-recall @{args.k} A->B: {recall_ab * 100:.2f}% ({secs:.2f}s)") + + secs, recall_ba = measure_seconds( + lambda: index_a.search(vectors_b, **search_kwargs).recall(np.arange(min_elements)) + ) + print(f"Cross-recall @{args.k} B->A: {recall_ba * 100:.2f}% ({secs:.2f}s)") + + # Join + print("\n--- Join ---") + start_time = perf_counter() + bimapping = index_a.join(index_b, max_proposals=100) + join_elapsed = perf_counter() - start_time + + recall = sum(1 for i, j in bimapping.items() if i == j) + recall_pct = recall * 100.0 / min_elements + print(f"Found {len(bimapping):,} pairings in {join_elapsed:.2f}s, {recall_pct:.2f}% exact matches") + + +if __name__ == "__main__": + main() diff --git a/python/scripts/test_distances.py b/python/scripts/test_distances.py index cda9c9899..cda14fe00 100644 --- a/python/scripts/test_distances.py +++ b/python/scripts/test_distances.py @@ -7,7 +7,7 @@ Usage: uv run python/scripts/test_distances.py - + Dependencies listed in the script header for uv to resolve automatically. """ # /// script @@ -18,13 +18,10 @@ # ] # /// -import pytest import numpy as np +import pytest -import usearch from usearch.eval import random_vectors -from usearch.index import search - from usearch.index import ( Index, MetricKind, @@ -37,17 +34,20 @@ [ MetricKind.Cos, MetricKind.L2sq, - MetricKind.Divergence, - MetricKind.Pearson, ], ) @pytest.mark.parametrize( "quantization", [ ScalarKind.F32, - ScalarKind.F16, ScalarKind.BF16, + ScalarKind.F16, + ScalarKind.E5M2, + ScalarKind.E4M3, + ScalarKind.E3M2, + ScalarKind.E2M3, ScalarKind.I8, + ScalarKind.U8, ], ) @pytest.mark.parametrize( @@ -63,8 +63,8 @@ def test_distances_continuous(metric, quantization, dtype): ndim = 1024 try: index = Index(ndim=ndim, metric=metric, dtype=quantization) - vectors = random_vectors(count=2, ndim=ndim, dtype=dtype) keys = np.arange(2) + vectors = random_vectors(count=2, ndim=ndim, metric=metric, quantization=quantization, input_dtype=dtype) index.add(keys, vectors) except ValueError: pytest.skip(f"Unsupported metric `{metric}`, quantization `{quantization}`, dtype `{dtype}`") @@ -77,7 +77,10 @@ def test_distances_continuous(metric, quantization, dtype): distance_itself_second = index.pairwise_distance([1], [1]) distance_different = index.pairwise_distance([0], [1]) - assert not np.allclose(distance_different, 0) + if np.allclose(distance_different, 0): + pytest.skip(f"Quantization `{quantization}` too lossy for `{dtype.__name__}` input at ndim={ndim}") + return + assert np.allclose(distance_itself_first, 0, rtol=rtol, atol=atol) assert np.allclose(distance_itself_second, 0, rtol=rtol, atol=atol) @@ -93,7 +96,7 @@ def test_distances_continuous(metric, quantization, dtype): def test_distances_sparse(metric): ndim = 1024 index = Index(ndim=ndim, metric=metric, dtype=ScalarKind.B1) - vectors = random_vectors(count=2, ndim=ndim, dtype=ScalarKind.B1) + vectors = random_vectors(count=2, ndim=ndim, quantization=ScalarKind.B1) keys = np.arange(2) index.add(keys, vectors) diff --git a/python/scripts/test_index.py b/python/scripts/test_index.py index 6a96a19a8..1158a96b0 100644 --- a/python/scripts/test_index.py +++ b/python/scripts/test_index.py @@ -21,32 +21,34 @@ import os from time import time -import pytest import numpy as np +import pytest -from usearch.eval import random_vectors, self_recall, SearchStats +from usearch.eval import SearchStats, random_vectors, self_recall from usearch.index import ( + DEFAULT_CONNECTIVITY, + BatchMatches, + Clustering, Index, - MetricKind, - ScalarKind, Match, Matches, - BatchMatches, - Clustering, -) -from usearch.index import ( - DEFAULT_CONNECTIVITY, + MetricKind, + ScalarKind, ) - ndims = [3, 97, 256] batch_sizes = [1, 11, 77] quantizations = [ - ScalarKind.F32, ScalarKind.F64, - ScalarKind.F16, + ScalarKind.F32, ScalarKind.BF16, + ScalarKind.F16, + ScalarKind.E5M2, + ScalarKind.E4M3, + ScalarKind.E3M2, + ScalarKind.E2M3, ScalarKind.I8, + ScalarKind.U8, ] dtypes = [np.float32, np.float64, np.float16] threads = 2 @@ -458,6 +460,6 @@ def test_index_copied_memory_usage(): memory_with_copy = index_copied.memory_usage memory_without_copy = index_viewing.memory_usage - assert ( - memory_with_copy > memory_without_copy - ), f"Expected default index addition to use more memory than copy=False ({memory_with_copy} vs {memory_without_copy})" + assert memory_with_copy > memory_without_copy, ( + f"Expected default index addition to use more memory than copy=False ({memory_with_copy} vs {memory_without_copy})" + ) diff --git a/python/scripts/test_jit.py b/python/scripts/test_jit.py index 2bf8ef2cd..f519c8d7e 100644 --- a/python/scripts/test_jit.py +++ b/python/scripts/test_jit.py @@ -18,16 +18,15 @@ # ] # /// -import pytest import numpy as np +import pytest -import usearch from usearch.eval import random_vectors from usearch.index import ( + CompiledMetric, Index, MetricKind, MetricSignature, - CompiledMetric, ) dimensions = [3, 97, 256] @@ -44,7 +43,7 @@ def test_index_numba(ndim: int, batch_size: int): ! Requires the `numba` package to work. """ try: - from numba import cfunc, types, carray + from numba import carray, cfunc, types except ImportError: pytest.skip("Numba is not installed.") return @@ -118,7 +117,7 @@ def test_index_numba_negative(ndim: int, batch_size: int): ! Requires the `numba` package to work. """ try: - from numba import cfunc, types, carray + from numba import carray, cfunc, types except ImportError: pytest.skip("Numba is not installed.") return @@ -181,8 +180,10 @@ def translated_cosine_distance(a, b): translated_matches = translated_index.search(vectors, count_queries, exact=False) for query in keys.tolist(): - normal_keys = [normal_matches[query][i].key for i in range(count_queries)] - translated_keys = [translated_matches[query][i].key for i in range(count_queries)] + # Use set equality because without a deterministic seed, ties in distance + # can be broken differently, causing ordering differences for equally-distant results + normal_keys = set(normal_matches[query][i].key for i in range(count_queries)) + translated_keys = set(translated_matches[query][i].key for i in range(count_queries)) assert normal_keys == translated_keys, f"Expected {normal_keys} == {translated_keys} for key {query}" @@ -212,16 +213,14 @@ def test_index_cppyy(ndim: int, batch_size: int): result += a[i] * b[i]; return 1 - result; } - + float inner_product_three_args(float *a, float *b, size_t n) { float result = 0; for (size_t i = 0; i != n; ++i) result += a[i] * b[i]; return 1 - result; } - """.replace( - "ndim", str(ndim) - ) + """.replace("ndim", str(ndim)) ) functions = [ @@ -284,26 +283,26 @@ def test_index_peachpy(ndim: int, batch_size: int): try: from peachpy import ( Argument, - ptr, - float_, const_float_, + float_, + ptr, ) from peachpy.x86_64 import ( - abi, - Function, - uarch, - isa, - GeneralPurposeRegister64, LOAD, - YMMRegister, - VSUBPS, + RETURN, VADDPS, + VFMADD231PS, VHADDPS, VMOVUPS, - VFMADD231PS, VPERM2F128, + VSUBPS, VXORPS, - RETURN, + Function, + GeneralPurposeRegister64, + YMMRegister, + abi, + isa, + uarch, ) except ImportError: pytest.skip("PeachPy is not installed.") diff --git a/python/scripts/test_sparse.py b/python/scripts/test_sparse.py index c4344c779..b63f1a689 100644 --- a/python/scripts/test_sparse.py +++ b/python/scripts/test_sparse.py @@ -18,10 +18,9 @@ # ] # /// -import pytest import numpy as np +import pytest -import usearch from usearch.index import ( Index, MetricKind, diff --git a/python/scripts/test_sqlite.py b/python/scripts/test_sqlite.py index 627319b83..cbc14655b 100644 --- a/python/scripts/test_sqlite.py +++ b/python/scripts/test_sqlite.py @@ -7,27 +7,26 @@ Usage: uv run python/scripts/test_sqlite.py - + Dependencies listed in the script header for uv to resolve automatically. """ # /// script # dependencies = [ # "pytest", -# "numpy", +# "numpy", # "usearch" # ] # /// -import sqlite3 import json import math +import sqlite3 import numpy as np import pytest import usearch - try: found_sqlite_path = usearch.sqlite_path() except FileNotFoundError: @@ -64,7 +63,7 @@ def test_sqlite_minimal_json_cosine_vector_search(): vector JSON NOT NULL ); INSERT INTO vectors_table (id, vector) - VALUES + VALUES (42, '[1.0, 2.0, 3.0]'), (43, '[4.0, 5.0, 6.0]'); """ @@ -73,7 +72,7 @@ def test_sqlite_minimal_json_cosine_vector_search(): # the `distance_cosine_f32` extension function cursor.execute( """ - SELECT vt.id, + SELECT vt.id, distance_cosine_f32(vt.vector, '[7.0, 8.0, 9.0]') AS distance FROM vectors_table AS vt; """ @@ -112,14 +111,14 @@ def test_sqlite_minimal_text_search(): word TEXT NOT NULL ); INSERT INTO strings_table (id, word) - VALUES + VALUES (42, '{str42}'), (43, '{str43}'); """ ) cursor.execute( f""" - SELECT st.id, + SELECT st.id, distance_levenshtein_bytes(st.word, '{str44}') AS levenshtein_bytes, distance_levenshtein_unicode(st.word, '{str44}') AS levenshtein_unicode, @@ -162,7 +161,7 @@ def test_sqlite_blob_bits_vector_search(): vector BLOB NOT NULL ); INSERT INTO binary_vectors (id, vector) - VALUES + VALUES (42, X'FFFFFF'), -- 111111111111111111111111 in binary (43, X'000000'); -- 000000000000000000000000 in binary """ @@ -172,7 +171,7 @@ def test_sqlite_blob_bits_vector_search(): # the `distance_hamming_binary` and `distance_jaccard_binary` extension functions cursor.execute( """ - SELECT bv.id, + SELECT bv.id, distance_hamming_binary(bv.vector, X'FFFF00') AS hamming_distance, distance_jaccard_binary(bv.vector, X'FFFF00') AS jaccard_distance FROM binary_vectors AS bv; @@ -253,16 +252,16 @@ def test_sqlite_distances_in_high_dimensions(num_vectors: int, ndim: int): conn.commit() similarities = """ - SELECT + SELECT a.id AS id1, b.id AS id2, distance_cosine_f32(a.vector_json, b.vector_json) AS cosine_similarity_json, distance_cosine_f32(a.vector_f32, b.vector_f32) AS cosine_similarity_f32, distance_cosine_f16(a.vector_f16, b.vector_f16) AS cosine_similarity_f16 - FROM + FROM vector_table AS a, vector_table AS b - WHERE + WHERE a.id < b.id; """ cursor.execute(similarities) @@ -322,16 +321,16 @@ def test_sqlite_distances_in_low_dimensions(num_vectors: int): # Query to calculate pairwise distances between vectors cursor.execute( """ - SELECT + SELECT a.id AS id1, b.id AS id2, distance_cosine_f32(a.vector_d0, a.vector_d1, a.vector_d2, a.vector_d3, b.vector_d0, b.vector_d1, b.vector_d2, b.vector_d3) AS cosine_similarity_f32, distance_cosine_f16(a.vector_d0, a.vector_d1, a.vector_d2, a.vector_d3, b.vector_d0, b.vector_d1, b.vector_d2, b.vector_d3) AS cosine_similarity_f16, distance_haversine_meters(a.vector_d0, a.vector_d1, b.vector_d0, b.vector_d1) AS haversine_meters - FROM + FROM vector_table AS a, vector_table AS b - WHERE + WHERE a.id < b.id """ ) diff --git a/python/scripts/test_tooling.py b/python/scripts/test_tooling.py index d73577bb1..ddffa800e 100644 --- a/python/scripts/test_tooling.py +++ b/python/scripts/test_tooling.py @@ -7,7 +7,7 @@ Usage: uv run python/scripts/test_tooling.py - + Dependencies listed in the script header for uv to resolve automatically. """ # /// script @@ -20,16 +20,12 @@ import os -import pytest import numpy as np +import pytest -import usearch -from usearch.io import load_matrix, save_matrix -from usearch.index import search from usearch.eval import random_vectors - -from usearch.index import Match, Matches, BatchMatches, Index, Indexes, kmeans - +from usearch.index import BatchMatches, Index, Indexes, Match, Matches, kmeans, search +from usearch.io import load_matrix, save_matrix dimensions = [3, 97, 256] batch_sizes = [1, 77, 100] diff --git a/python/usearch/__init__.py b/python/usearch/__init__.py index e5f559e89..c8bced53a 100644 --- a/python/usearch/__init__.py +++ b/python/usearch/__init__.py @@ -1,55 +1,63 @@ -import os -import sys import ctypes +import os import platform -import warnings +import sys import urllib.request -from typing import Optional, Tuple +import warnings from urllib.error import HTTPError -#! Load SimSIMD before the USearch compiled module -#! We can't just use the `import simsimd` as on Linux and Windows (unlike MacOS), +#! Load NumKong before the USearch compiled module +#! We can't just use the `import numkong` as on Linux and Windows (unlike MacOS), #! the symbols are not automatically loaded into the global namespace. +#! NumKong v7.5+ ships as a regular package (`numkong/__init__.py` plus a +#! `numkong/_numkong.` extension). We require v7.5+ in +#! `pyproject.toml`, so we load the compiled submodule directly. try: - import simsimd - - # Cross-platform check for Windows - if sys.platform == "win32": - # Add the directory where the `.dll` is located - dll_directory = os.path.dirname(simsimd.__file__) - os.add_dll_directory(dll_directory) + from numkong import _numkong as _numkong_ext - # Load SimSIMD library using `ctypes` without `RTLD_GLOBAL` - simsimd_lib = ctypes.CDLL(simsimd.__file__) + _numkong_path = _numkong_ext.__file__ + if _numkong_path is None: + raise ImportError("Could not locate NumKong compiled extension") + if sys.platform == "win32": + # On Windows, register the extension's directory so the OS loader finds + # any bundled sibling DLLs, then load without `RTLD_GLOBAL`. + os.add_dll_directory(os.path.dirname(_numkong_path)) + numkong_lib = ctypes.CDLL(_numkong_path) else: - # Non-Windows: Use `RTLD_GLOBAL` for Unix-based systems (Linux/macOS) - simsimd_lib = ctypes.CDLL(simsimd.__file__, mode=ctypes.RTLD_GLOBAL) + # On Linux/macOS we need `RTLD_GLOBAL` so USearch's compiled module can + # resolve NumKong symbols at its own load time. + numkong_lib = ctypes.CDLL(_numkong_path, mode=ctypes.RTLD_GLOBAL) except ImportError: - pass # If the user doesn't want SimSIMD, we assume they know what they're doing + pass # If the user doesn't want NumKong, we assume they know what they're doing from usearch.compiled import ( - VERSION_MAJOR, - VERSION_MINOR, - VERSION_PATCH, # Default values: DEFAULT_CONNECTIVITY, DEFAULT_EXPANSION_ADD, DEFAULT_EXPANSION_SEARCH, + USES_NUMKONG, + USES_NUMKONG_DYNAMIC_DISPATCH, # Dependencies: USES_OPENMP, - USES_FP16LIB, USES_SIMSIMD, USES_SIMSIMD_DYNAMIC_DISPATCH, + VERSION_MAJOR, + VERSION_MINOR, + VERSION_PATCH, + hardware_acceleration, + hardware_acceleration_available, + # Hardware capabilities: + hardware_acceleration_compiled, ) __version__ = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}" class BinaryManager: - def __init__(self, version: Optional[str] = None): + def __init__(self, version: str | None = None): if version is None: version = __version__ self.version = version or __version__ @@ -68,11 +76,11 @@ def determine_download_dir(): @staticmethod def determine_download_url(version: str, filename: str) -> str: - base_url = "https://github.com/unum-cloud/usearch/releases/download" + base_url = "https://github.com/unum-cloud/USearch/releases/download" url = f"{base_url}/v{version}/{filename}" return url - def get_binary_name(self) -> Tuple[str, str]: + def get_binary_name(self) -> tuple[str, str]: version = self.version os_map = {"Linux": "linux", "Windows": "windows", "Darwin": "macos"} arch_map = { @@ -90,7 +98,7 @@ def get_binary_name(self) -> Tuple[str, str]: target_filename = f"usearch_sqlite.{extension}" return source_filename, target_filename - def sqlite_found_or_downloaded(self) -> Optional[str]: + def sqlite_found_or_downloaded(self) -> str | None: """ Attempts to locate the pre-installed `usearch_sqlite` binary. If not found, downloads it from GitHub. @@ -104,7 +112,6 @@ def sqlite_found_or_downloaded(self) -> Optional[str]: # Check local development directories first for local_dir in local_dirs: - local_path = os.path.join(local_dir, target_filename) if os.path.exists(local_path): path_wout_extension, _, _ = local_path.rpartition(".") @@ -120,7 +127,6 @@ def sqlite_found_or_downloaded(self) -> Optional[str]: download_dir = self.determine_download_dir() local_path = os.path.join(download_dir, target_filename) if not os.path.exists(local_path): - # If not found locally, warn the user and download from GitHub warnings.warn("Will download `usearch_sqlite` binary from GitHub.", UserWarning) try: diff --git a/python/usearch/client.py b/python/usearch/client.py index b8946b7ed..b9ce6b4e4 100644 --- a/python/usearch/client.py +++ b/python/usearch/client.py @@ -1,12 +1,10 @@ -from typing import Union, Optional, List - import numpy as np from ucall.client import Client from usearch.index import Matches -def _vector_to_ascii(vector: np.ndarray) -> Optional[str]: +def _vector_to_ascii(vector: np.ndarray) -> str | None: if vector.dtype != np.int8 and vector.dtype != np.uint8 and vector.dtype != np.byte: return None if not np.all((vector >= 0) | (vector <= 100)): @@ -41,14 +39,14 @@ def add_many(self, keys: np.ndarray, vectors: np.ndarray): assert keys.shape[0] == vectors.shape[0] self.client.add_many(keys=keys, vectors=vectors) - def add(self, keys: Union[np.ndarray, int], vectors: np.ndarray): + def add(self, keys: np.ndarray | int, vectors: np.ndarray): if isinstance(keys, int) or len(keys) == 1: return self.add_one(keys, vectors) else: return self.add_many(keys, vectors) def search_one(self, vector: np.ndarray, count: int) -> Matches: - matches: List[dict] = [] + matches: list[dict] = [] vector = vector.flatten() ascii_vector = _vector_to_ascii(vector) if ascii_vector: @@ -71,7 +69,7 @@ def search_one(self, vector: np.ndarray, count: int) -> Matches: def search_many(self, vectors: np.ndarray, count: int) -> Matches: batch_size: int = vectors.shape[0] - list_of_matches: List[List[dict]] = self.client.search_many(vectors=vectors, count=count) + list_of_matches: list[list[dict]] = self.client.search_many(vectors=vectors, count=count) keys = np.array((batch_size, count), dtype=np.uint32) distances = np.array((batch_size, count), dtype=np.float32) diff --git a/python/usearch/compiled.pyi b/python/usearch/compiled.pyi new file mode 100644 index 000000000..e4648b06d --- /dev/null +++ b/python/usearch/compiled.pyi @@ -0,0 +1,277 @@ +from __future__ import annotations + +from collections.abc import Callable + +import numpy + +# Module-level constants +DEFAULT_CONNECTIVITY: int +DEFAULT_EXPANSION_ADD: int +DEFAULT_EXPANSION_SEARCH: int +USES_OPENMP: int +USES_NUMKONG: int +USES_NUMKONG_DYNAMIC_DISPATCH: int +USES_SIMSIMD: int +USES_SIMSIMD_DYNAMIC_DISPATCH: int +VERSION_MAJOR: int +VERSION_MINOR: int +VERSION_PATCH: int + +class MetricKind(int): + Unknown: MetricKind + IP: MetricKind + Cos: MetricKind + L2sq: MetricKind + Haversine: MetricKind + Divergence: MetricKind + Pearson: MetricKind + Jaccard: MetricKind + Hamming: MetricKind + Tanimoto: MetricKind + Sorensen: MetricKind + Cosine: MetricKind # alias for Cos + InnerProduct: MetricKind # alias for IP + +class ScalarKind(int): + Unknown: ScalarKind + F64: ScalarKind + F32: ScalarKind + BF16: ScalarKind + F16: ScalarKind + E5M2: ScalarKind + E4M3: ScalarKind + E3M2: ScalarKind + E2M3: ScalarKind + I8: ScalarKind + U8: ScalarKind + B1: ScalarKind + U40: ScalarKind + UUID: ScalarKind + U64: ScalarKind + U32: ScalarKind + U16: ScalarKind + I64: ScalarKind + I32: ScalarKind + I16: ScalarKind + +class MetricSignature(int): + ArrayArray: MetricSignature + ArrayArraySize: MetricSignature + +class IndexStats: + nodes: int + edges: int + max_edges: int + allocated_bytes: int + +class Index: + def __init__( + self, + *, + ndim: int = 0, + dtype: ScalarKind = ..., + connectivity: int = ..., + expansion_add: int = ..., + expansion_search: int = ..., + metric_kind: MetricKind = ..., + metric_signature: MetricSignature = ..., + metric_pointer: int = 0, + multi: bool = False, + enable_key_lookups: bool = True, + ) -> None: ... + def add_many( + self, + keys: numpy.ndarray, + vectors: numpy.ndarray, + *, + copy: bool = True, + threads: int = 0, + progress: Callable[[int, int], bool] | None = None, + dtype: ScalarKind = ..., + ) -> None: ... + def search_many( + self, + queries: numpy.ndarray, + count: int = 10, + exact: bool = False, + threads: int = 0, + progress: Callable[[int, int], bool] | None = None, + dtype: ScalarKind = ..., + ) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, int, int]: ... + def cluster_vectors( + self, + queries: numpy.ndarray, + min_count: int = 0, + max_count: int = 0, + threads: int = 0, + progress: Callable[[int, int], bool] | None = None, + dtype: ScalarKind = ..., + ) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, int, int]: ... + def cluster_keys( + self, + queries: numpy.ndarray, + min_count: int = 0, + max_count: int = 0, + threads: int = 0, + progress: Callable[[int, int], bool] | None = None, + ) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, int, int]: ... + def rename_one_to_one(self, from_: int, to: int) -> bool: ... + def rename_many_to_many(self, from_: list[int], to: list[int]) -> list[bool]: ... + def rename_many_to_one(self, from_: list[int], to: int) -> list[bool]: ... + def remove_one(self, key: int, compact: bool, threads: int) -> bool: ... + def remove_many(self, key: list[int], compact: bool, threads: int) -> int: ... + def contains_one(self, key: int) -> bool: ... + def contains_many(self, keys: numpy.ndarray) -> numpy.ndarray: ... + def count_one(self, key: int) -> int: ... + def count_many(self, keys: numpy.ndarray) -> numpy.ndarray: ... + def get_many(self, keys: numpy.ndarray, dtype: ScalarKind = ...) -> object: ... + def get_keys_in_slice(self, offset: int = 0, limit: int = ...) -> numpy.ndarray: ... + def get_keys_at_offsets(self, offsets: numpy.ndarray) -> numpy.ndarray: ... + def get_key_at_offset(self, offset: int) -> int: ... + def save_index_to_path( + self, + path: str, + progress: Callable[[int, int], bool] | None = None, + ) -> None: ... + def load_index_from_path( + self, + path: str, + progress: Callable[[int, int], bool] | None = None, + ) -> None: ... + def view_index_from_path( + self, + path: str, + progress: Callable[[int, int], bool] | None = None, + ) -> None: ... + def save_index_to_buffer( + self, + progress: Callable[[int, int], bool] | None = None, + ) -> bytearray: ... + def load_index_from_buffer( + self, + buffer_obj: object, + progress: Callable[[int, int], bool] | None = None, + ) -> None: ... + def view_index_from_buffer( + self, + buffer_obj: object, + progress: Callable[[int, int], bool] | None = None, + ) -> None: ... + def reset(self) -> None: ... + def clear(self) -> None: ... + def copy(self, *, copy: bool = True) -> Index: ... + def compact( + self, + threads: int, + progress: Callable[[int, int], bool] | None = None, + ) -> None: ... + def join( + self, + other: Index, + max_proposals: int = 0, + exact: bool = False, + progress: Callable[[int, int], bool] | None = None, + ) -> dict[int, int]: ... + def change_metric( + self, + metric_kind: MetricKind = ..., + metric_signature: MetricSignature = ..., + metric_pointer: int = 0, + ) -> None: ... + def pairwise_distances( + self, + left: numpy.ndarray, + right: numpy.ndarray, + ) -> numpy.ndarray: ... + def pairwise_distance(self, left: int, right: int) -> float: ... + def level_stats(self, level: int) -> IndexStats: ... + def __len__(self) -> int: ... + @property + def size(self) -> int: ... + @property + def ndim(self) -> int: ... + @property + def dtype(self) -> ScalarKind: ... + @property + def connectivity(self) -> int: ... + @property + def capacity(self) -> int: ... + @property + def multi(self) -> bool: ... + @property + def serialized_length(self) -> int: ... + @property + def memory_usage(self) -> int: ... + @property + def expansion_add(self) -> int: ... + @expansion_add.setter + def expansion_add(self, value: int) -> None: ... + @property + def expansion_search(self) -> int: ... + @expansion_search.setter + def expansion_search(self, value: int) -> None: ... + @property + def hardware_acceleration(self) -> str: ... + @property + def max_level(self) -> int: ... + @property + def stats(self) -> IndexStats: ... + @property + def levels_stats(self) -> list[IndexStats]: ... + +class Indexes: + def __init__(self) -> None: ... + def __len__(self) -> int: ... + def merge(self, shard: Index) -> None: ... + def merge_paths( + self, + paths: list[str], + view: bool = True, + threads: int = 0, + ) -> None: ... + def search_many( + self, + query: numpy.ndarray, + count: int = 10, + exact: bool = False, + threads: int = 0, + progress: Callable[[int, int], bool] | None = None, + dtype: ScalarKind = ..., + ) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, int, int]: ... + +def exact_search( + dataset: numpy.ndarray, + queries: numpy.ndarray, + count: int = 10, + *, + threads: int = 0, + metric_kind: MetricKind = ..., + metric_signature: MetricSignature = ..., + metric_pointer: int = 0, + progress: Callable[[int, int], bool] | None = None, + dtype: ScalarKind = ..., +) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, int, int]: ... +def kmeans( + dataset: numpy.ndarray, + count: int = 10, + *, + max_iterations: int = ..., + inertia_threshold: float = ..., + max_seconds: float = ..., + min_shifts: float = ..., + seed: int = 0, + threads: int = 0, + dtype: ScalarKind = ..., + metric_kind: MetricKind = ..., + progress: Callable[[int, int], bool] | None = None, +) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]: ... +def hardware_acceleration( + *, + dtype: ScalarKind = ..., + ndim: int = 0, + metric_kind: MetricKind = ..., +) -> str: ... +def hardware_acceleration_compiled() -> str: ... +def hardware_acceleration_available() -> str: ... +def index_dense_metadata_from_path(path: str) -> dict: ... +def index_dense_metadata_from_buffer(buffer: object) -> dict: ... diff --git a/python/usearch/eval.py b/python/usearch/eval.py index c4e95122c..576692794 100644 --- a/python/usearch/eval.py +++ b/python/usearch/eval.py @@ -1,36 +1,78 @@ from __future__ import annotations -from time import time_ns -from typing import Tuple, Any, Callable, Union, Optional, List -from dataclasses import dataclass, asdict + from collections import defaultdict +from collections.abc import Callable +from dataclasses import asdict, dataclass from math import ceil +from time import time_ns +from typing import Any import numpy as np -from usearch.io import load_matrix from usearch.index import ( - Index, BatchMatches, - ScalarKind, + Index, + Key, MetricKind, MetricKindBitwise, - Key, - _normalize_metric, + ScalarKind, _normalize_dtype, - _to_numpy_dtype, + _normalize_metric, ) +from usearch.io import load_matrix + +# Actual representable value range for each USearch scalar type. +_SCALAR_RANGE: dict[ScalarKind, tuple[float, float]] = { + ScalarKind.F64: (-1e308, 1e308), + ScalarKind.F32: (-3.4e38, 3.4e38), + ScalarKind.BF16: (-3.39e38, 3.39e38), + ScalarKind.F16: (-65504.0, 65504.0), + ScalarKind.E5M2: (-57344.0, 57344.0), + ScalarKind.E4M3: (-448.0, 448.0), + ScalarKind.E3M2: (-28.0, 28.0), + ScalarKind.E2M3: (-7.5, 7.5), + ScalarKind.I8: (-127.0, 127.0), + ScalarKind.U8: (0.0, 255.0), +} + +# Actual representable value range for numpy input types. +_INPUT_DTYPE_RANGE: dict[type, tuple[float, float]] = { + np.float64: (-1e308, 1e308), + np.float32: (-3.4e38, 3.4e38), + np.float16: (-65504.0, 65504.0), + np.int8: (-127.0, 127.0), + np.uint8: (0.0, 255.0), +} + +# Generation ceiling: we never need components larger than this. +_MAX_GENERATION_RANGE = 1000.0 def random_vectors( count: int, metric: MetricKind = MetricKind.IP, - dtype: ScalarKind = ScalarKind.F32, - ndim: Optional[int] = None, - index: Optional[Index] = None, + quantization: ScalarKind = ScalarKind.F32, + ndim: int | None = None, + index: Index | None = None, + input_dtype: type | None = None, + # Deprecated alias for `quantization` โ€” will be removed in v3. + dtype: ScalarKind | None = None, ) -> np.ndarray: - """Produces a collection of random vectors normalized for the provided `metric` - and matching wanted `dtype`, which can both be inferred from an existing `index`. + """Produces a collection of random vectors for the provided `metric` and `quantization`. + + For bitwise metrics: generates random packed bit vectors. + For spatial metrics: generates gaussian vectors scaled to fill the quantization + type's full dynamic range. For IP metric, additionally normalizes to unit sphere. + + :param quantization: USearch scalar type that determines the value range. + :param input_dtype: Numpy dtype for the output array. The value range is + intersected with the quantization range so values survive the cast without + collapsing. Useful when testing different input dtypes with the same index. + :param dtype: Deprecated alias for `quantization`. """ + # Handle deprecated `dtype` alias + if dtype is not None: + quantization = dtype # Infer default parameters from the `index`, if passed if index is not None: @@ -38,28 +80,53 @@ def random_vectors( raise ValueError("Unsupported `index` type") ndim = index.ndim - dtype = index.numpy_dtype + quantization = index.numpy_dtype metric = index.metric else: metric: MetricKind = _normalize_metric(metric) - dtype: ScalarKind = _normalize_dtype(dtype, ndim=ndim, metric=metric) + quantization: ScalarKind = _normalize_dtype(quantization, ndim=ndim, metric=metric) # Produce data - if metric in MetricKindBitwise or dtype == ScalarKind.B1: + if metric in MetricKindBitwise or quantization == ScalarKind.B1: bit_vectors = np.random.randint(2, size=(count, ndim)) - bit_vectors = np.packbits(bit_vectors, axis=1) - return bit_vectors - - else: - x = np.random.rand(count, ndim) - if _to_numpy_dtype(dtype) == np.int8: - x = (x * 100).astype(np.int8) - else: - x = x.astype(_to_numpy_dtype(dtype)) - if metric == MetricKind.IP: - return x / np.linalg.norm(x, axis=1, keepdims=True) - return x + return np.packbits(bit_vectors, axis=1) + + # Compute effective value range: start with quantization range, + # intersect with input dtype range if specified, then clamp to + # a sensible generation ceiling (we never need components of 1e38). + low, high = _SCALAR_RANGE.get(quantization, (-1.0, 1.0)) + if input_dtype is not None: + input_low, input_high = _INPUT_DTYPE_RANGE.get(input_dtype, (low, high)) + low = max(low, input_low) + high = min(high, input_high) + low = max(low, -_MAX_GENERATION_RANGE) + high = min(high, _MAX_GENERATION_RANGE) + + # Gaussian: mean = midpoint of range, stddev = range / 6 (99.7% within bounds). + mean = (low + high) / 2.0 + stddev = max((high - low) / 6.0, 1e-6) + + # Output numpy dtype: use input_dtype if specified, otherwise infer from + # the quantization type. Types without native numpy equivalents (bf16, FP8, FP6) + # stay as float32 โ€” USearch handles quantization internally. + _QUANT_TO_NUMPY: dict[ScalarKind, type] = { + ScalarKind.F64: np.float64, + ScalarKind.F32: np.float32, + ScalarKind.F16: np.float16, + ScalarKind.I8: np.int8, + ScalarKind.U8: np.uint8, + } + numpy_dtype = input_dtype if input_dtype is not None else _QUANT_TO_NUMPY.get(quantization, np.float32) + x = (np.random.randn(count, ndim) * stddev + mean).astype(numpy_dtype) + + # For IP metric, normalize to unit sphere + if metric == MetricKind.IP: + norms = np.linalg.norm(x.astype(np.float64), axis=1, keepdims=True) + norms = np.maximum(norms, 1e-10) + x = (x.astype(np.float64) / norms).astype(numpy_dtype) + + return x @dataclass @@ -94,7 +161,7 @@ def mean_recall(self) -> float: return self.count_matches / self.count_queries -def self_recall(index: Index, sample: Union[float, int] = 1.0, **kwargs) -> SearchStats: +def self_recall(index: Index, sample: float | int = 1.0, **kwargs) -> SearchStats: """Simplest benchmark for a quality of search, which queries every existing member of the index, to make sure approximate search finds the point itself. @@ -139,7 +206,7 @@ def self_recall(index: Index, sample: Union[float, int] = 1.0, **kwargs) -> Sear ) -def measure_seconds(f: Callable) -> Tuple[float, Any]: +def measure_seconds(f: Callable) -> tuple[float, Any]: """Simple function profiling decorator. :param f: Function to be profiled @@ -155,7 +222,7 @@ def measure_seconds(f: Callable) -> Tuple[float, Any]: return secs, result -def dcg(relevances: np.ndarray, k: Optional[int] = None) -> np.ndarray: +def dcg(relevances: np.ndarray, k: int | None = None) -> np.ndarray: """Calculate DCG (Discounted Cumulative Gain) up to position k. :param relevances: List of true relevance scores (in the order as they are ranked) @@ -176,7 +243,7 @@ def dcg(relevances: np.ndarray, k: Optional[int] = None) -> np.ndarray: return np.sum(relevances / discounts) -def ndcg(relevances: np.ndarray, k: Optional[int] = None) -> np.ndarray: +def ndcg(relevances: np.ndarray, k: int | None = None) -> np.ndarray: """Calculate NDCG (Normalized Discounted Cumulative Gain) at position k. :param relevances: List of true relevance scores (in the order as they are ranked) @@ -193,7 +260,7 @@ def ndcg(relevances: np.ndarray, k: Optional[int] = None) -> np.ndarray: return dcg(relevances, k) / best_dcg -def relevance(expected: np.ndarray, predicted: np.ndarray, k: Optional[int] = None) -> np.ndarray: +def relevance(expected: np.ndarray, predicted: np.ndarray, k: int | None = None) -> np.ndarray: """Calculate relevance scores. Binary relevance scores :param expected: ground-truth keys @@ -222,12 +289,12 @@ def ndim(self): @staticmethod def build( - vectors: Optional[str] = None, - queries: Optional[str] = None, - neighbors: Optional[str] = None, - count: Optional[int] = None, - ndim: Optional[int] = None, - k: Optional[int] = None, + vectors: str | None = None, + queries: str | None = None, + neighbors: str | None = None, + count: int | None = None, + ndim: int | None = None, + k: int | None = None, ): """Either loads an existing dataset from disk, or generates one on the fly. @@ -284,12 +351,12 @@ def build( @dataclass class TaskResult: - add_operations: Optional[int] = None - add_per_second: Optional[float] = None + add_operations: int | None = None + add_per_second: float | None = None - search_operations: Optional[int] = None - search_per_second: Optional[float] = None - recall_at_one: Optional[float] = None + search_operations: int | None = None + search_per_second: float | None = None + recall_at_one: float | None = None def __repr__(self) -> str: parts = [] @@ -366,7 +433,7 @@ def inplace_shuffle(self): self.keys = self.keys[new_order] self.vectors = self.vectors[new_order, :] - def slices(self, batch_size: int) -> List[AddTask]: + def slices(self, batch_size: int) -> list[AddTask]: """Splits this dataset into smaller chunks.""" return [ @@ -377,7 +444,7 @@ def slices(self, batch_size: int) -> List[AddTask]: for start_row in range(0, self.count, batch_size) ] - def clusters(self, number_of_clusters: int) -> List[AddTask]: + def clusters(self, number_of_clusters: int) -> list[AddTask]: """Splits this dataset into smaller chunks.""" from sklearn.cluster import KMeans @@ -414,7 +481,7 @@ def __call__(self, index: Index) -> TaskResult: recall_at_one=results.mean_recall(self.neighbors[:, 0].flatten()), ) - def slices(self, batch_size: int) -> List[SearchTask]: + def slices(self, batch_size: int) -> list[SearchTask]: """Splits this dataset into smaller chunks.""" return [ @@ -428,7 +495,7 @@ def slices(self, batch_size: int) -> List[SearchTask]: @dataclass class Evaluation: - tasks: List[Union[AddTask, SearchTask]] + tasks: list[AddTask | SearchTask] count: int ndim: int diff --git a/python/usearch/index.py b/python/usearch/index.py index 887bac223..449752f98 100644 --- a/python/usearch/index.py +++ b/python/usearch/index.py @@ -1,53 +1,62 @@ from __future__ import annotations -from inspect import signature -from collections.abc import Sequence + +import math # The purpose of this file is to provide Pythonic wrapper on top # the native precompiled CPython module. It improves compatibility # Python tooling, linters, and static analyzers. It also embeds JIT # into the primary `Index` class, connecting USearch with Numba. import os -import sys -import math +from collections.abc import Callable, Iterable, Sequence from dataclasses import dataclass +from inspect import signature from typing import ( Any, - Optional, - Union, NamedTuple, - List, - Iterable, - Tuple, - Dict, - Callable, + TypeAlias, + Union, ) import numpy as np from tqdm import tqdm +# Precompiled symbols that will be exposed +from usearch.compiled import ( + DEFAULT_CONNECTIVITY, + DEFAULT_EXPANSION_ADD, + DEFAULT_EXPANSION_SEARCH, + USES_NUMKONG, + USES_OPENMP, + USES_SIMSIMD, + MetricKind, + MetricSignature, + ScalarKind, +) + # Precompiled symbols that won't be exposed directly: from usearch.compiled import ( Index as _CompiledIndex, +) +from usearch.compiled import ( Indexes as _CompiledIndexes, +) +from usearch.compiled import ( IndexStats as _CompiledIndexStats, - index_dense_metadata_from_path as _index_dense_metadata_from_path, - index_dense_metadata_from_buffer as _index_dense_metadata_from_buffer, +) +from usearch.compiled import ( exact_search as _exact_search, +) +from usearch.compiled import ( hardware_acceleration as _hardware_acceleration, - kmeans as _kmeans, ) - -# Precompiled symbols that will be exposed from usearch.compiled import ( - MetricKind, - ScalarKind, - MetricSignature, - DEFAULT_CONNECTIVITY, - DEFAULT_EXPANSION_ADD, - DEFAULT_EXPANSION_SEARCH, - USES_OPENMP, - USES_SIMSIMD, - USES_FP16LIB, + index_dense_metadata_from_buffer as _index_dense_metadata_from_buffer, +) +from usearch.compiled import ( + index_dense_metadata_from_path as _index_dense_metadata_from_path, +) +from usearch.compiled import ( + kmeans as _kmeans, ) MetricKindBitwise = ( @@ -63,12 +72,6 @@ class CompiledMetric(NamedTuple): signature: MetricSignature -# Define TypeAlias for older Python versions -if sys.version_info >= (3, 10): - from typing import TypeAlias -else: - TypeAlias = object # Fallback for older Python versions - Key: TypeAlias = np.uint64 NoneType: TypeAlias = type(None) @@ -88,7 +91,7 @@ class CompiledMetric(NamedTuple): ProgressCallback = Callable[[int, int], bool] -def _match_signature(func: Callable[[Any], Any], arg_types: List[type], ret_type: type) -> bool: +def _match_signature(func: Callable[[Any], Any], arg_types: list[type], ret_type: type) -> bool: assert callable(func), "Not callable" sig = signature(func) param_types = [param.annotation for param in sig.parameters.values()] @@ -120,7 +123,13 @@ def _normalize_dtype( "f32": ScalarKind.F32, "bf16": ScalarKind.BF16, "f16": ScalarKind.F16, + "e5m2": ScalarKind.E5M2, + "e4m3": ScalarKind.E4M3, + "e3m2": ScalarKind.E3M2, + "e2m3": ScalarKind.E2M3, "i8": ScalarKind.I8, + "u8": ScalarKind.U8, + "uint8": ScalarKind.U8, "b1": ScalarKind.B1, "b1x8": ScalarKind.B1, "bits": ScalarKind.B1, @@ -142,10 +151,17 @@ def _to_numpy_dtype(dtype: ScalarKind): if dtype == ScalarKind.BF16: return None _normalize = { + ScalarKind.F64: np.float64, ScalarKind.F64: np.float64, ScalarKind.F32: np.float32, + ScalarKind.BF16: np.uint16, ScalarKind.F16: np.float16, + ScalarKind.E5M2: np.uint8, + ScalarKind.E4M3: np.uint8, + ScalarKind.E3M2: np.uint8, + ScalarKind.E2M3: np.uint8, ScalarKind.I8: np.int8, + ScalarKind.U8: np.uint8, ScalarKind.B1: np.uint8, } if dtype in _normalize.values(): @@ -192,10 +208,11 @@ def _search_in_compiled( compiled_callable: Callable, vectors: np.ndarray, *, - log: Union[str, bool], - progress: Optional[ProgressCallback], + log: str | bool, + progress: ProgressCallback | None, + dtype: ScalarKind | None = None, **kwargs, -) -> Union[Matches, BatchMatches]: +) -> Matches | BatchMatches: # assert isinstance(vectors, np.ndarray), "Expects a NumPy array" assert vectors.ndim == 1 or vectors.ndim == 2, "Expects a matrix or vector" @@ -207,7 +224,7 @@ def _search_in_compiled( def distill_batch( batch_matches: BatchMatches, - ) -> Union[BatchMatches, Matches]: + ) -> BatchMatches | Matches: return batch_matches[0] if count_vectors == 1 else batch_matches progress_callback = progress @@ -232,9 +249,11 @@ def update_progress_bar(processed: int, total: int) -> bool: progress_callback = update_progress_bar if progress_callback: - tuple_ = compiled_callable(vectors, progress=progress_callback, **kwargs) + tuple_ = compiled_callable( + vectors, progress=progress_callback, dtype=dtype if dtype is not None else ScalarKind.Unknown, **kwargs + ) else: - tuple_ = compiled_callable(vectors, **kwargs) + tuple_ = compiled_callable(vectors, dtype=dtype if dtype is not None else ScalarKind.Unknown, **kwargs) if log: progress_bar.close() @@ -249,9 +268,10 @@ def _add_to_compiled( vectors, copy: bool, threads: int, - log: Union[str, bool], - progress: Optional[ProgressCallback], -) -> Union[int, np.ndarray]: + log: str | bool, + progress: ProgressCallback | None, + dtype: ScalarKind | None = None, +) -> int | np.ndarray: # assert isinstance(vectors, np.ndarray), "Expects a NumPy array" assert not progress or _match_signature(progress, [int, int], bool), "Invalid callback" @@ -292,10 +312,18 @@ def update_progress_bar(processed: int, total: int) -> bool: copy=copy, threads=threads, progress=update_progress_bar, + dtype=dtype if dtype is not None else ScalarKind.Unknown, ) progress_bar.close() else: - compiled.add_many(keys, vectors, copy=copy, threads=threads, progress=progress) + compiled.add_many( + keys, + vectors, + copy=copy, + threads=threads, + progress=progress, + dtype=dtype if dtype is not None else ScalarKind.Unknown, + ) return keys @@ -333,7 +361,7 @@ def __getitem__(self, index: int) -> Match: else: raise IndexError(f"`index` must be an integer under {len(self)}") - def to_list(self) -> List[tuple]: + def to_list(self) -> list[tuple]: """Convert to list of (key, distance) tuples.""" return [(int(key), float(distance)) for key, distance in zip(self.keys, self.distances)] @@ -350,7 +378,7 @@ class BatchMatches(Sequence): Attributes: keys: 2D array of shape (n_queries, k) containing match keys - distances: 2D array of shape (n_queries, k) containing distances + distances: 2D array of shape (n_queries, k) containing distances counts: 1D array of shape (n_queries,) with actual number of matches per query visited_members: Total graph nodes visited during search computed_distances: Total distance computations performed @@ -377,17 +405,17 @@ def __getitem__(self, index: int) -> Matches: else: raise IndexError(f"`index` must be an integer under {len(self)}") - def to_list(self) -> List[List[tuple]]: + def to_list(self) -> list[list[tuple]]: """Flatten matches for all queries into a list of `(key, distance)` tuples.""" list_of_matches = [self.__getitem__(row) for row in range(self.__len__())] return [match.to_tuple() for matches in list_of_matches for match in matches] - def mean_recall(self, expected: np.ndarray, count: Optional[int] = None) -> float: + def mean_recall(self, expected: np.ndarray, count: int | None = None) -> float: """Measures recall [0, 1] as of `Matches` that contain the corresponding `expected` entry anywhere among results.""" return self.count_matches(expected, count=count) / len(expected) - def count_matches(self, expected: np.ndarray, count: Optional[int] = None) -> int: + def count_matches(self, expected: np.ndarray, count: int | None = None) -> int: """Measures recall [0, len(expected)] as of `Matches` that contain the corresponding `expected` entry anywhere among results. """ @@ -413,7 +441,7 @@ def __init__( self, index: Index, matches: BatchMatches, - queries: Optional[np.ndarray] = None, + queries: np.ndarray | None = None, ) -> None: if queries is None: queries = index._compiled.get_keys_in_slice() @@ -425,7 +453,7 @@ def __repr__(self) -> str: return f"usearch.Clustering(for {len(self.queries)} queries)" @property - def centroids_popularity(self) -> Tuple[np.ndarray, np.ndarray]: + def centroids_popularity(self) -> tuple[np.ndarray, np.ndarray]: return np.unique(self.matches.keys, return_counts=True) def members_of(self, centroid: Key) -> np.ndarray: @@ -472,8 +500,8 @@ def __len__(self) -> int: def __getitem__( self, - offset_offsets_or_slice: Union[int, np.ndarray, slice], - ) -> Union[Key, np.ndarray]: + offset_offsets_or_slice: int | np.ndarray | slice, + ) -> Key | np.ndarray: if isinstance(offset_offsets_or_slice, slice): start, stop, step = offset_offsets_or_slice.indices(len(self)) if step != 1: @@ -501,7 +529,7 @@ def __array__(self, dtype=None) -> np.ndarray: class Index: """Fast approximate nearest neighbor search for dense vectors. - Supports various distance metrics (cosine, euclidean, inner product, etc.) + Supports various distance metrics (cosine, euclidean, inner product, etc.) and automatic precision optimization. Vector keys must be integers. All vectors must have the same dimensionality. @@ -516,12 +544,12 @@ def __init__( *, # All arguments must be named ndim: int = 0, metric: MetricLike = MetricKind.Cos, - dtype: Optional[DTypeLike] = None, - connectivity: Optional[int] = None, - expansion_add: Optional[int] = None, - expansion_search: Optional[int] = None, + dtype: DTypeLike | None = None, + connectivity: int | None = None, + expansion_add: int | None = None, + expansion_search: int | None = None, multi: bool = False, - path: Optional[os.PathLike] = None, + path: os.PathLike | None = None, view: bool = False, enable_key_lookups: bool = True, ) -> None: @@ -625,7 +653,7 @@ def __init__( self.load(path) @staticmethod - def metadata(path_or_buffer: PathOrBuffer) -> Optional[dict]: + def metadata(path_or_buffer: PathOrBuffer) -> dict | None: try: if _is_buffer(path_or_buffer): return _index_dense_metadata_from_buffer(path_or_buffer) @@ -638,7 +666,7 @@ def metadata(path_or_buffer: PathOrBuffer) -> Optional[dict]: raise e @staticmethod - def restore(path_or_buffer: PathOrBuffer, view: bool = False, **kwargs) -> Optional[Index]: + def restore(path_or_buffer: PathOrBuffer, view: bool = False, **kwargs) -> Index | None: meta = Index.metadata(path_or_buffer) if not meta: return None @@ -666,9 +694,10 @@ def add( *, copy: bool = True, threads: int = 0, - log: Union[str, bool] = False, - progress: Optional[ProgressCallback] = None, - ) -> Union[int, np.ndarray]: + log: str | bool = False, + progress: ProgressCallback | None = None, + dtype: DTypeLike | None = None, + ) -> int | np.ndarray: """Inserts one or move vectors into the index. For maximal performance the `keys` and `vectors` @@ -706,6 +735,7 @@ def add( threads=threads, log=log, progress=progress, + dtype=_normalize_dtype(dtype) if dtype is not None else None, ) def search( @@ -716,16 +746,17 @@ def search( *, threads: int = 0, exact: bool = False, - log: Union[str, bool] = False, - progress: Optional[ProgressCallback] = None, - ) -> Union[Matches, BatchMatches]: + log: str | bool = False, + progress: ProgressCallback | None = None, + dtype: DTypeLike | None = None, + ) -> Matches | BatchMatches: """Performs approximate nearest neighbors search for one or more queries. - + When searching with batch queries, returns BatchMatches that pre-allocates arrays for the requested `count` size. If fewer matches exist than requested (e.g., when count > index size), use individual query access via batch_matches[i] to get only valid results, or check batch_matches.counts to see actual result counts per query. - + :param vectors: Query vector or vectors. :type vectors: VectorOrVectorsLike :param count: Upper count on the number of matches to find @@ -756,18 +787,19 @@ def search( exact=exact, threads=threads, progress=progress, + dtype=_normalize_dtype(dtype) if dtype is not None else None, ) - def contains(self, keys: KeyOrKeysLike) -> Union[bool, np.ndarray]: + def contains(self, keys: KeyOrKeysLike) -> bool | np.ndarray: if isinstance(keys, Iterable): return self._compiled.contains_many(np.array(keys, dtype=Key)) else: return self._compiled.contains_one(int(keys)) - def __contains__(self, keys: KeyOrKeysLike) -> Union[bool, np.ndarray]: + def __contains__(self, keys: KeyOrKeysLike) -> bool | np.ndarray: return self.contains(keys) - def count(self, keys: KeyOrKeysLike) -> Union[int, np.ndarray]: + def count(self, keys: KeyOrKeysLike) -> int | np.ndarray: if isinstance(keys, Iterable): return self._compiled.count_many(np.array(keys, dtype=Key)) else: @@ -776,8 +808,8 @@ def count(self, keys: KeyOrKeysLike) -> Union[int, np.ndarray]: def get( self, keys: KeyOrKeysLike, - dtype: Optional[DTypeLike] = None, - ) -> Union[Optional[np.ndarray], Tuple[Optional[np.ndarray]]]: + dtype: DTypeLike | None = None, + ) -> np.ndarray | None | tuple[np.ndarray | None]: """Looks up one or more keys from the `Index`, retrieving corresponding vectors. Returns `None`, if one key is requested, and its not present. @@ -819,7 +851,7 @@ def cast(result): results = cast(results) if isinstance(results, np.ndarray) else [cast(result) for result in results] return results[0] if is_one else results - def __getitem__(self, keys: KeyOrKeysLike) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + def __getitem__(self, keys: KeyOrKeysLike) -> np.ndarray | tuple[np.ndarray, np.ndarray]: """Looks up one or more keys from the `Index`, retrieving corresponding vectors. Returns `None`, if one key is requested, and its not present. @@ -840,7 +872,7 @@ def remove( *, compact: bool = False, threads: int = 0, - ) -> Union[int, np.ndarray]: + ) -> int | np.ndarray: """Removes one or move vectors from the index. When working with extremely large indexes, you may want to @@ -862,14 +894,14 @@ def remove( keys = np.array(keys, dtype=Key) return self._compiled.remove_many(keys, compact=compact, threads=threads) - def __delitem__(self, keys: KeyOrKeysLike) -> Union[int, np.ndarray]: + def __delitem__(self, keys: KeyOrKeysLike) -> int | np.ndarray: return self.remove(keys) def rename( self, from_: KeyOrKeysLike, to: KeyOrKeysLike, - ) -> Union[int, np.ndarray]: + ) -> int | np.ndarray: """Rename existing member vector or vectors. May be used in iterative clustering procedures, where one would iteratively @@ -944,7 +976,7 @@ def serialized_length(self) -> int: return self._compiled.serialized_length @property - def metric_kind(self) -> Union[MetricKind, CompiledMetric]: + def metric_kind(self) -> MetricKind | CompiledMetric: """Returns the type of metric used for distance calculations. :return: The metric kind used in the index. @@ -953,7 +985,7 @@ def metric_kind(self) -> Union[MetricKind, CompiledMetric]: return self._metric_jit.kind if self._metric_jit else self._metric_kind @property - def metric(self) -> Union[MetricKind, CompiledMetric]: + def metric(self) -> MetricKind | CompiledMetric: """Returns the metric object used for distance calculations. :return: The metric used in the index. @@ -1069,9 +1101,9 @@ def expansion_search(self, v: int): def save( self, - path_or_buffer: Union[str, os.PathLike, NoneType] = None, - progress: Optional[ProgressCallback] = None, - ) -> Optional[bytes]: + path_or_buffer: str | os.PathLike | NoneType = None, + progress: ProgressCallback | None = None, + ) -> bytes | None: """Saves the index to a file or buffer. If `path_or_buffer` is not provided, it defaults to the path stored in `self.path`. @@ -1093,8 +1125,8 @@ def save( def load( self, - path_or_buffer: Union[PathOrBuffer, NoneType] = None, - progress: Optional[ProgressCallback] = None, + path_or_buffer: PathOrBuffer | NoneType = None, + progress: ProgressCallback | None = None, ): """Loads the index from a file or buffer. @@ -1123,8 +1155,8 @@ def load( def view( self, - path_or_buffer: Union[PathOrBuffer, NoneType] = None, - progress: Optional[ProgressCallback] = None, + path_or_buffer: PathOrBuffer | NoneType = None, + progress: ProgressCallback | None = None, ): """Maps the index from a file or buffer without loading it into memory. @@ -1183,8 +1215,8 @@ def join( other: Index, max_proposals: int = 0, exact: bool = False, - progress: Optional[ProgressCallback] = None, - ) -> Dict[Key, Key]: + progress: ProgressCallback | None = None, + ) -> dict[Key, Key]: """Performs "Semantic Join" or pairwise matching between `self` & `other` index. Is different from `search`, as no collisions are allowed in resulting pairs. Uses the concept of "Stable Marriages" from Combinatorics, famous for the 2012 @@ -1213,13 +1245,14 @@ def join( def cluster( self, *, - vectors: Optional[np.ndarray] = None, - keys: Optional[np.ndarray] = None, - min_count: Optional[int] = None, - max_count: Optional[int] = None, + vectors: np.ndarray | None = None, + keys: np.ndarray | None = None, + min_count: int | None = None, + max_count: int | None = None, threads: int = 0, - log: Union[str, bool] = False, - progress: Optional[ProgressCallback] = None, + log: str | bool = False, + progress: ProgressCallback | None = None, + dtype: DTypeLike | None = None, ) -> Clustering: """ Clusters already indexed or provided `vectors`, mapping them to various centroids. @@ -1253,6 +1286,7 @@ def cluster( max_count=max_count, threads=threads, progress=progress, + dtype=_normalize_dtype(dtype) if dtype is not None else ScalarKind.Unknown, ) else: if keys is None: @@ -1271,7 +1305,7 @@ def cluster( batch_matches = BatchMatches(*results) return Clustering(self, batch_matches, keys) - def pairwise_distance(self, left: KeyOrKeysLike, right: KeyOrKeysLike) -> Union[np.ndarray, float]: + def pairwise_distance(self, left: KeyOrKeysLike, right: KeyOrKeysLike) -> np.ndarray | float: """Computes the pairwise distance between keys or key arrays. If `left` and `right` are single keys, returns the distance between them. @@ -1354,7 +1388,7 @@ def stats(self) -> _CompiledIndexStats: return self._compiled.stats @property - def levels_stats(self) -> List[_CompiledIndexStats]: + def levels_stats(self) -> list[_CompiledIndexStats]: """Get the accumulated statistics for each level of the graph. :return: List of statistics for each level of the graph. @@ -1385,7 +1419,7 @@ def level_stats(self, level: int) -> _CompiledIndexStats: return self._compiled.level_stats(level) @property - def specs(self) -> Dict[str, Union[str, int, bool]]: + def specs(self) -> dict[str, str | int | bool]: """Returns the specifications of the index. :return: Dictionary of index specifications. @@ -1407,8 +1441,8 @@ def specs(self) -> Dict[str, Union[str, int, bool]]: "dtype": self.dtype, "path": self.path, "compiled_with_openmp": USES_OPENMP, + "compiled_with_numkong": USES_NUMKONG, "compiled_with_simsimd": USES_SIMSIMD, - "compiled_with_native_f16": USES_FP16LIB, } def __repr__(self) -> str: @@ -1458,8 +1492,7 @@ def __repr_pretty__(self) -> str: f"-- expansion on search: {self.expansion_search} candidates", "- binary", f"-- uses OpenMP: {USES_OPENMP}", - f"-- uses SimSIMD: {USES_SIMSIMD}", - f"-- supports half-precision: {USES_FP16LIB}", + f"-- uses NumKong: {USES_NUMKONG}", f"-- uses hardware acceleration: {self.hardware_acceleration}", "- state", f"-- size: {self.size:,} vectors", @@ -1510,7 +1543,8 @@ def search( *, threads: int = 0, exact: bool = False, - progress: Optional[ProgressCallback] = None, + progress: ProgressCallback | None = None, + dtype: DTypeLike | None = None, ): return _search_in_compiled( self._compiled.search_many, @@ -1522,6 +1556,7 @@ def search( exact=exact, threads=threads, progress=progress, + dtype=_normalize_dtype(dtype) if dtype is not None else None, ) @@ -1533,9 +1568,10 @@ def search( *, exact: bool = False, threads: int = 0, - log: Union[str, bool] = False, - progress: Optional[ProgressCallback] = None, -) -> Union[Matches, BatchMatches]: + log: str | bool = False, + progress: ProgressCallback | None = None, + dtype: DTypeLike | None = None, +) -> Matches | BatchMatches: """Shortcut for search, that can avoid index construction. Particularly useful for tiny datasets, where brute-force exact search works fast enough. @@ -1567,6 +1603,8 @@ def search( assert not progress or _match_signature(progress, [int, int], bool), "Invalid callback signature" assert dataset.ndim == 2, "Dataset must be a matrix, with a vector in each row" + scalar_kind = _normalize_dtype(dtype) if dtype is not None else ScalarKind.Unknown + if not exact: index = Index( ndim=dataset.shape[1], @@ -1579,6 +1617,7 @@ def search( threads=threads, log=log, progress=progress, + dtype=scalar_kind, ) return index.search( query, @@ -1586,6 +1625,7 @@ def search( threads=threads, log=log, progress=progress, + dtype=scalar_kind, ) metric = _normalize_metric(metric) @@ -1623,6 +1663,7 @@ def search_batch(query, **kwargs): count=count, threads=threads, progress=progress, + dtype=scalar_kind, ) @@ -1635,8 +1676,8 @@ def kmeans( inertia_threshold: float = 1e-4, max_seconds: float = 60.0, min_shifts: float = 0.01, - seed: Optional[int] = None, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + seed: int | None = None, +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: """ Performs KMeans clustering on a dataset using the USearch library with mixed-precision support. diff --git a/python/usearch/io.py b/python/usearch/io.py index 417e9d36e..51e325583 100644 --- a/python/usearch/io.py +++ b/python/usearch/io.py @@ -1,6 +1,5 @@ import os import struct -import typing import numpy as np @@ -21,7 +20,7 @@ def numpy_scalar_size(dtype) -> int: }[dtype] -def guess_numpy_dtype_from_filename(filename) -> typing.Optional[type]: +def guess_numpy_dtype_from_filename(filename) -> type | None: if filename.endswith(".fbin"): return np.float32 elif filename.endswith(".dbin"): @@ -47,8 +46,8 @@ def load_matrix( start_row: int = 0, count_rows: int = None, view: bool = False, - dtype: typing.Optional[type] = None, -) -> typing.Optional[np.ndarray]: + dtype: type | None = None, +) -> np.ndarray | None: """Read *.ibin, *.bbib, *.hbin, *.fbin, *.dbin, *.i8bin, *.i32bin files with matrices. :param filename: path to the matrix file @@ -70,18 +69,22 @@ def load_matrix( with open(filename, "rb") as f: rows, cols = np.fromfile(f, count=2, dtype=np.int32).astype(np.uint64) - + # Validate file size matches expected data size f.seek(0, 2) # Go to end file_size = f.tell() expected_size = 8 + (rows * cols * scalar_size) # Header + data - + if file_size != expected_size: if file_size < expected_size: - raise ValueError(f"File {filename} is truncated. Expected {expected_size:,} bytes, got {file_size:,} bytes") + raise ValueError( + f"File {filename} is truncated. Expected {expected_size:,} bytes, got {file_size:,} bytes" + ) else: - raise ValueError(f"File {filename} is larger than expected. Expected {expected_size:,} bytes, got {file_size:,} bytes") - + raise ValueError( + f"File {filename} is larger than expected. Expected {expected_size:,} bytes, got {file_size:,} bytes" + ) + f.seek(8) # Back to start of data rows = (rows - start_row) if count_rows is None else count_rows row_offset = start_row * scalar_size * cols diff --git a/python/usearch/numba.py b/python/usearch/numba.py index 3c1a4e61c..62f8ecc14 100644 --- a/python/usearch/numba.py +++ b/python/usearch/numba.py @@ -4,7 +4,7 @@ # into the primary `Index` class, connecting USearch with Numba. from math import sqrt -from usearch.index import MetricKind, ScalarKind, MetricSignature, CompiledMetric +from usearch.index import CompiledMetric, MetricKind, MetricSignature, ScalarKind def jit( @@ -25,7 +25,7 @@ def jit( assert isinstance(metric, MetricKind) assert isinstance(dtype, ScalarKind) - from numba import cfunc, types, carray + from numba import carray, cfunc, types signature_i8args = types.float32(types.CPointer(types.int8), types.CPointer(types.int8)) signature_f16args = types.float32(types.CPointer(types.float16), types.CPointer(types.float16)) diff --git a/python/usearch/server.py b/python/usearch/server.py index 8f93ff6ac..46bb6a466 100644 --- a/python/usearch/server.py +++ b/python/usearch/server.py @@ -1,13 +1,12 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import os import argparse -import numpy as np -from typing import List +import os +import numpy as np from ucall.rich_posix import Server -from usearch.index import Index, Matches, Key + +from usearch.index import Index, Key, Matches def _ascii_to_vector(string: str) -> np.ndarray: @@ -70,14 +69,14 @@ def add_many(keys: np.ndarray, vectors: np.ndarray): index.add(keys, vectors, threads=threads) @server - def search_one(vector: np.ndarray, count: int) -> List[dict]: + def search_one(vector: np.ndarray, count: int) -> list[dict]: print("search", vector, count) vectors = vector.reshape(vector.shape[0], 1) results: Matches = index.search(vectors, count) return results.to_list() @server - def search_many(vectors: np.ndarray, count: int) -> List[List[dict]]: + def search_many(vectors: np.ndarray, count: int) -> list[list[dict]]: results: Matches = index.search(vectors, count) return results.to_list() diff --git a/rust/README.md b/rust/README.md index 99cf7478c..866ec9b5c 100644 --- a/rust/README.md +++ b/rust/README.md @@ -15,7 +15,7 @@ This will add a USearch dependency to your `Cargo.toml` file. usearch = "..." ``` -By default, [SimSIMD](https://github.com/ashvardanian/simsimd) is used to provide dynamic dispatch for SIMD operations. +By default, [NumKong](https://github.com/ashvardanian/numkong) is used to provide dynamic dispatch for SIMD operations. You can, however, override that by specifying custom features in your `Cargo.toml` file. To disable all features, use the following configuration: @@ -28,7 +28,7 @@ To enable specific features, use the following configuration: ```toml [dependencies] -usearch = { version = "...", features = ["simsimd", "openmp", "fp16lib"] } +usearch = { version = "...", features = ["numkong", "openmp", "fp16lib"] } ``` OpenMP (`openmp`) will use the OpenMP runtime for parallelism. @@ -91,7 +91,7 @@ assert!(index.view_from_buffer(&serialization_buffer).is_ok()); ## Metrics -USearch comes pre-packaged with SimSIMD, bringing over 100 SIMD-accelerated distance kernels for x86 and ARM architectures. +USearch comes pre-packaged with NumKong, bringing over 100 SIMD-accelerated distance kernels for x86 and ARM architectures. That includes: - `MetricKind::IP` - Inner Product metric, defined as `IP = 1 - sum(a[i] * b[i])`. @@ -111,7 +111,7 @@ To use a custom metric with USearch, define a function that matches the expected Let's say you are implementing a weighted distance function to search through joint embeddings of images and textual descriptions of some products in a catalog, taking some [UForm](https://github.com/unum-cloud/uform) or CLIP-like models. ```rust -use simsimd::SpatialSimilarity; +use numkong::SpatialSimilarity; let image_dimensions: usize = 768; let text_dimensions: usize = 512; diff --git a/rust/lib.cpp b/rust/lib.cpp index 40daef090..58837f36f 100644 --- a/rust/lib.cpp +++ b/rust/lib.cpp @@ -10,6 +10,9 @@ using search_result_t = typename index_t::search_result_t; using labeling_result_t = typename index_t::labeling_result_t; using vector_key_t = typename index_dense_t::vector_key_t; +char const* hardware_acceleration_compiled() { return unum::usearch::hardware_acceleration_compiled(); } +char const* hardware_acceleration_available() { return unum::usearch::hardware_acceleration_available(); } + metric_kind_t rust_to_cpp_metric(MetricKind value) { switch (value) { case MetricKind::IP: return metric_kind_t::ip_k; @@ -27,11 +30,16 @@ metric_kind_t rust_to_cpp_metric(MetricKind value) { scalar_kind_t rust_to_cpp_scalar(ScalarKind value) { switch (value) { - case ScalarKind::I8: return scalar_kind_t::i8_k; + case ScalarKind::F64: return scalar_kind_t::f64_k; + case ScalarKind::F32: return scalar_kind_t::f32_k; case ScalarKind::BF16: return scalar_kind_t::bf16_k; case ScalarKind::F16: return scalar_kind_t::f16_k; - case ScalarKind::F32: return scalar_kind_t::f32_k; - case ScalarKind::F64: return scalar_kind_t::f64_k; + case ScalarKind::E5M2: return scalar_kind_t::e5m2_k; + case ScalarKind::E4M3: return scalar_kind_t::e4m3_k; + case ScalarKind::E3M2: return scalar_kind_t::e3m2_k; + case ScalarKind::E2M3: return scalar_kind_t::e2m3_k; + case ScalarKind::I8: return scalar_kind_t::i8_k; + case ScalarKind::U8: return scalar_kind_t::u8_k; case ScalarKind::B1: return scalar_kind_t::b1x8_k; default: return scalar_kind_t::unknown_k; } @@ -91,38 +99,43 @@ auto make_predicate(uptr_t metric, uptr_t metric_state) { } // clang-format off -void NativeIndex::add_b1x8(vector_key_t key, rust::Slice vec) const { add_(*index_, key, (b1x8_t const*)vec.data(), vec.size()); } -void NativeIndex::add_i8(vector_key_t key, rust::Slice vec) const { add_(*index_, key, vec.data(), vec.size()); } -void NativeIndex::add_f16(vector_key_t key, rust::Slice vec) const { add_(*index_, key, (f16_t const*)vec.data(), vec.size()); } -void NativeIndex::add_f32(vector_key_t key, rust::Slice vec) const { add_(*index_, key, vec.data(), vec.size()); } void NativeIndex::add_f64(vector_key_t key, rust::Slice vec) const { add_(*index_, key, vec.data(), vec.size()); } +void NativeIndex::add_f32(vector_key_t key, rust::Slice vec) const { add_(*index_, key, vec.data(), vec.size()); } +void NativeIndex::add_f16(vector_key_t key, rust::Slice vec) const { add_(*index_, key, (f16_t const*)vec.data(), vec.size()); } +void NativeIndex::add_i8(vector_key_t key, rust::Slice vec) const { add_(*index_, key, vec.data(), vec.size()); } +void NativeIndex::add_u8(vector_key_t key, rust::Slice vec) const { add_(*index_, key, (u8_t const*)vec.data(), vec.size()); } +void NativeIndex::add_b1x8(vector_key_t key, rust::Slice vec) const { add_(*index_, key, (b1x8_t const*)vec.data(), vec.size()); } // Regular approximate search -Matches NativeIndex::search_b1x8(rust::Slice vec, size_t count) const { return search_(*index_, (b1x8_t const*)vec.data(), vec.size(), count, false); } -Matches NativeIndex::search_i8(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, false); } -Matches NativeIndex::search_f16(rust::Slice vec, size_t count) const { return search_(*index_, (f16_t const*)vec.data(), vec.size(), count, false); } -Matches NativeIndex::search_f32(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, false); } Matches NativeIndex::search_f64(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, false); } +Matches NativeIndex::search_f32(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, false); } +Matches NativeIndex::search_f16(rust::Slice vec, size_t count) const { return search_(*index_, (f16_t const*)vec.data(), vec.size(), count, false); } +Matches NativeIndex::search_i8(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, false); } +Matches NativeIndex::search_u8(rust::Slice vec, size_t count) const { return search_(*index_, (u8_t const*)vec.data(), vec.size(), count, false); } +Matches NativeIndex::search_b1x8(rust::Slice vec, size_t count) const { return search_(*index_, (b1x8_t const*)vec.data(), vec.size(), count, false); } // Exact (brute force) search -Matches NativeIndex::exact_search_b1x8(rust::Slice vec, size_t count) const { return search_(*index_, (b1x8_t const*)vec.data(), vec.size(), count, true); } -Matches NativeIndex::exact_search_i8(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, true); } -Matches NativeIndex::exact_search_f16(rust::Slice vec, size_t count) const { return search_(*index_, (f16_t const*)vec.data(), vec.size(), count, true); } -Matches NativeIndex::exact_search_f32(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, true); } Matches NativeIndex::exact_search_f64(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, true); } +Matches NativeIndex::exact_search_f32(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, true); } +Matches NativeIndex::exact_search_f16(rust::Slice vec, size_t count) const { return search_(*index_, (f16_t const*)vec.data(), vec.size(), count, true); } +Matches NativeIndex::exact_search_i8(rust::Slice vec, size_t count) const { return search_(*index_, vec.data(), vec.size(), count, true); } +Matches NativeIndex::exact_search_u8(rust::Slice vec, size_t count) const { return search_(*index_, (u8_t const*)vec.data(), vec.size(), count, true); } +Matches NativeIndex::exact_search_b1x8(rust::Slice vec, size_t count) const { return search_(*index_, (b1x8_t const*)vec.data(), vec.size(), count, true); } // Filtered search (always approximate) -Matches NativeIndex::filtered_search_b1x8(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, (b1x8_t const*)vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } -Matches NativeIndex::filtered_search_i8(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } -Matches NativeIndex::filtered_search_f16(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, (f16_t const*)vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } -Matches NativeIndex::filtered_search_f32(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } Matches NativeIndex::filtered_search_f64(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } +Matches NativeIndex::filtered_search_f32(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } +Matches NativeIndex::filtered_search_f16(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, (f16_t const*)vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } +Matches NativeIndex::filtered_search_i8(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } +Matches NativeIndex::filtered_search_u8(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, (u8_t const*)vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } +Matches NativeIndex::filtered_search_b1x8(rust::Slice vec, size_t count, uptr_t metric, uptr_t metric_state) const { return filtered_search_(*index_, (b1x8_t const*)vec.data(), vec.size(), count, make_predicate(metric, metric_state)); } -size_t NativeIndex::get_b1x8(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, (b1x8_t*)vec.data(), vec.size() / dimensions()); } -size_t NativeIndex::get_i8(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, vec.data(), vec.size() / dimensions()); } -size_t NativeIndex::get_f16(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, (f16_t*)vec.data(), vec.size() / dimensions()); } -size_t NativeIndex::get_f32(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, vec.data(), vec.size() / dimensions()); } size_t NativeIndex::get_f64(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, vec.data(), vec.size() / dimensions()); } +size_t NativeIndex::get_f32(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, vec.data(), vec.size() / dimensions()); } +size_t NativeIndex::get_f16(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, (f16_t*)vec.data(), vec.size() / dimensions()); } +size_t NativeIndex::get_i8(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, vec.data(), vec.size() / dimensions()); } +size_t NativeIndex::get_u8(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, (u8_t*)vec.data(), vec.size() / dimensions()); } +size_t NativeIndex::get_b1x8(vector_key_t key, rust::Slice vec) const { if (vec.size() % dimensions()) throw std::invalid_argument("Vector length must match index dimensionality"); return index_->get(key, (b1x8_t*)vec.data(), vec.size() / dimensions()); } // clang-format on size_t NativeIndex::expansion_add() const { return index_->expansion_add(); } @@ -180,6 +193,19 @@ void NativeIndex::view(rust::Str path) const { void NativeIndex::reset() const { index_->reset(); } size_t NativeIndex::memory_usage() const { return index_->memory_usage(); } + +MemoryStats NativeIndex::memory_stats() const { + auto stats = index_->memory_stats(); + MemoryStats result; + result.graph_allocated = stats.graph_allocated; + result.graph_wasted = stats.graph_wasted; + result.graph_reserved = stats.graph_reserved; + result.vectors_allocated = stats.vectors_allocated; + result.vectors_wasted = stats.vectors_wasted; + result.vectors_reserved = stats.vectors_reserved; + return result; +} + char const* NativeIndex::hardware_acceleration() const { return index_->metric().isa_name(); } void NativeIndex::save_to_buffer(rust::Slice buffer) const { @@ -211,8 +237,10 @@ std::unique_ptr new_native_index(IndexOptions const& options) { index_dense_config_t config(options.connectivity, options.expansion_add, options.expansion_search); config.multi = options.multi; index_t index = index_t::make(metric, config); - // In Rust we have the luxury of returning a `Result` type even for the constructor. - // So let's pre-reserve the maximal number of threads and return the error if it fails. - index.reserve(index_limits_t{}); - return wrap(std::move(index)); + + // Preserve constructor pre-allocation semantics (`index_limits_t{}`), but execute + // reserve after heap allocation to avoid move-induced pointer invalidation. + std::unique_ptr native = wrap(std::move(index)); + native->reserve_capacity_and_threads(0, std::thread::hardware_concurrency()); + return native; } diff --git a/rust/lib.hpp b/rust/lib.hpp index 42fec8b85..4482971f5 100644 --- a/rust/lib.hpp +++ b/rust/lib.hpp @@ -4,6 +4,7 @@ // We don't have to forward declare all of those: struct Matches; struct IndexOptions; +struct MemoryStats; enum class MetricKind; enum class ScalarKind; @@ -29,18 +30,21 @@ class NativeIndex { void add_b1x8(vector_key_t key, rust::Slice vector) const; void add_i8(vector_key_t key, rust::Slice vector) const; + void add_u8(vector_key_t key, rust::Slice vector) const; void add_f16(vector_key_t key, rust::Slice vector) const; void add_f32(vector_key_t key, rust::Slice vector) const; void add_f64(vector_key_t key, rust::Slice vector) const; Matches search_b1x8(rust::Slice query, size_t count) const; Matches search_i8(rust::Slice query, size_t count) const; + Matches search_u8(rust::Slice query, size_t count) const; Matches search_f16(rust::Slice query, size_t count) const; Matches search_f32(rust::Slice query, size_t count) const; Matches search_f64(rust::Slice query, size_t count) const; Matches exact_search_b1x8(rust::Slice query, size_t count) const; Matches exact_search_i8(rust::Slice query, size_t count) const; + Matches exact_search_u8(rust::Slice query, size_t count) const; Matches exact_search_f16(rust::Slice query, size_t count) const; Matches exact_search_f32(rust::Slice query, size_t count) const; Matches exact_search_f64(rust::Slice query, size_t count) const; @@ -48,6 +52,7 @@ class NativeIndex { // clang-format off Matches filtered_search_b1x8(rust::Slice query, size_t count, uptr_t filter_function, uptr_t filter_state) const; Matches filtered_search_i8(rust::Slice query, size_t count, uptr_t filter_function, uptr_t filter_state) const; + Matches filtered_search_u8(rust::Slice query, size_t count, uptr_t filter_function, uptr_t filter_state) const; Matches filtered_search_f16(rust::Slice query, size_t count, uptr_t filter_function, uptr_t filter_state) const; Matches filtered_search_f32(rust::Slice query, size_t count, uptr_t filter_function, uptr_t filter_state) const; Matches filtered_search_f64(rust::Slice query, size_t count, uptr_t filter_function, uptr_t filter_state) const; @@ -55,6 +60,7 @@ class NativeIndex { size_t get_b1x8(vector_key_t key, rust::Slice vector) const; size_t get_i8(vector_key_t key, rust::Slice vector) const; + size_t get_u8(vector_key_t key, rust::Slice vector) const; size_t get_f16(vector_key_t key, rust::Slice vector) const; size_t get_f32(vector_key_t key, rust::Slice vector) const; size_t get_f64(vector_key_t key, rust::Slice vector) const; @@ -83,6 +89,7 @@ class NativeIndex { void view(rust::Str path) const; void reset() const; size_t memory_usage() const; + MemoryStats memory_stats() const; char const* hardware_acceleration() const; void save_to_buffer(rust::Slice buffer) const; @@ -94,3 +101,6 @@ class NativeIndex { }; std::unique_ptr new_native_index(IndexOptions const& options); + +char const* hardware_acceleration_compiled(); +char const* hardware_acceleration_available(); diff --git a/rust/lib.rs b/rust/lib.rs index 3b97b830d..098055251 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -9,7 +9,7 @@ //! ## Features //! //! - SIMD-accelerated distance calculations for various metrics. -//! - Support for `f32`, `f64`, `i8`, custom `f16`, and binary (`b1x8`) vector types. +//! - Support for `f32`, `f64`, `i8`, `u8`, custom `f16`, and binary (`b1x8`) vector types. //! - Extensible with custom distance metrics and filtering predicates. //! - Efficient serialization and deserialization for persistence and network transfers. //! @@ -22,6 +22,26 @@ pub fn version() -> &'static str { env!("CARGO_PKG_VERSION") } +/// Returns a comma-separated list of ISAs compiled into this binary. +pub fn hardware_acceleration_compiled() -> String { + use core::ffi::CStr; + unsafe { + CStr::from_ptr(ffi::hardware_acceleration_compiled()) + .to_string_lossy() + .into_owned() + } +} + +/// Returns a comma-separated list of ISAs available at runtime (compiled AND supported by CPU). +pub fn hardware_acceleration_available() -> String { + use core::ffi::CStr; + unsafe { + CStr::from_ptr(ffi::hardware_acceleration_available()) + .to_string_lossy() + .into_owned() + } +} + /// The key type used to identify vectors in the index. /// It is a 64-bit unsigned integer. pub type Key = u64; @@ -258,7 +278,7 @@ pub mod ffi { IP, /// The squared Euclidean Distance metric, defined as `L2 = sum((a[i] - b[i])^2)`. L2sq, - /// The Cosine Similarity metric, defined as `Cos = 1 - sum(a[i] * b[i]) / (sqrt(sum(a[i]^2) * sqrt(sum(b[i]^2)))`. + /// The Cosine Distance metric, defined as `Cos = 1 - sum(a[i] * b[i]) / (sqrt(sum(a[i]^2)) * sqrt(sum(b[i]^2)))`. Cos, /// The Pearson Correlation metric. Pearson, @@ -283,12 +303,22 @@ pub mod ffi { F64, /// 32-bit single-precision IEEE 754 floating-point number. F32, - /// 16-bit half-precision IEEE 754 floating-point number (different from `bf16`). - F16, /// 16-bit brain floating-point number. BF16, + /// 16-bit half-precision IEEE 754 floating-point number (different from `bf16`). + F16, + /// 8-bit floating point: 1 sign + 5 exponent + 2 mantissa. + E5M2, + /// 8-bit floating point: 1 sign + 4 exponent + 3 mantissa. + E4M3, + /// 8-bit floating point: 1 sign + 3 exponent + 2 mantissa, range +/-28. + E3M2, + /// 8-bit floating point: 1 sign + 2 exponent + 3 mantissa, range +/-7.5. + E2M3, /// 8-bit signed integer. I8, + /// 8-bit unsigned integer. + U8, /// 1-bit binary value, packed 8 per byte. B1, } @@ -301,6 +331,24 @@ pub mod ffi { distances: Vec, } + /// Detailed memory statistics with separate breakdowns for the graph + /// and vectors allocator tapes. + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + struct MemoryStats { + /// Total memory allocated by the graph structure allocator, in bytes. + graph_allocated: usize, + /// Memory wasted due to alignment in the graph allocator, in bytes. + graph_wasted: usize, + /// Reserved but unused memory in the graph allocator, in bytes. + graph_reserved: usize, + /// Total memory allocated by the vectors data allocator, in bytes. + vectors_allocated: usize, + /// Memory wasted due to alignment in the vectors allocator, in bytes. + vectors_wasted: usize, + /// Reserved but unused memory in the vectors allocator, in bytes. + vectors_reserved: usize, + } + /// The index options used to configure the dense index during creation. /// It contains the number of dimensions, the metric kind, the scalar kind, /// the connectivity, the expansion values, and the multi-flag. @@ -319,6 +367,9 @@ pub mod ffi { unsafe extern "C++" { include!("lib.hpp"); + pub fn hardware_acceleration_compiled() -> *const c_char; + pub fn hardware_acceleration_available() -> *const c_char; + /// Low-level C++ interface that is further wrapped into the high-level `Index` type NativeIndex; @@ -349,38 +400,41 @@ pub mod ffi { pub fn capacity(self: &NativeIndex) -> usize; pub fn serialized_length(self: &NativeIndex) -> usize; - pub fn add_b1x8(self: &NativeIndex, key: u64, vector: &[u8]) -> Result<()>; - pub fn add_i8(self: &NativeIndex, key: u64, vector: &[i8]) -> Result<()>; - pub fn add_f16(self: &NativeIndex, key: u64, vector: &[i16]) -> Result<()>; - pub fn add_f32(self: &NativeIndex, key: u64, vector: &[f32]) -> Result<()>; pub fn add_f64(self: &NativeIndex, key: u64, vector: &[f64]) -> Result<()>; + pub fn add_f32(self: &NativeIndex, key: u64, vector: &[f32]) -> Result<()>; + pub fn add_f16(self: &NativeIndex, key: u64, vector: &[i16]) -> Result<()>; + pub fn add_i8(self: &NativeIndex, key: u64, vector: &[i8]) -> Result<()>; + pub fn add_u8(self: &NativeIndex, key: u64, vector: &[u8]) -> Result<()>; + pub fn add_b1x8(self: &NativeIndex, key: u64, vector: &[u8]) -> Result<()>; - pub fn search_b1x8(self: &NativeIndex, query: &[u8], count: usize) -> Result; - pub fn search_i8(self: &NativeIndex, query: &[i8], count: usize) -> Result; - pub fn search_f16(self: &NativeIndex, query: &[i16], count: usize) -> Result; - pub fn search_f32(self: &NativeIndex, query: &[f32], count: usize) -> Result; pub fn search_f64(self: &NativeIndex, query: &[f64], count: usize) -> Result; + pub fn search_f32(self: &NativeIndex, query: &[f32], count: usize) -> Result; + pub fn search_f16(self: &NativeIndex, query: &[i16], count: usize) -> Result; + pub fn search_i8(self: &NativeIndex, query: &[i8], count: usize) -> Result; + pub fn search_u8(self: &NativeIndex, query: &[u8], count: usize) -> Result; + pub fn search_b1x8(self: &NativeIndex, query: &[u8], count: usize) -> Result; - pub fn exact_search_b1x8(self: &NativeIndex, query: &[u8], count: usize) - -> Result; - pub fn exact_search_i8(self: &NativeIndex, query: &[i8], count: usize) -> Result; - pub fn exact_search_f16(self: &NativeIndex, query: &[i16], count: usize) + pub fn exact_search_f64(self: &NativeIndex, query: &[f64], count: usize) -> Result; pub fn exact_search_f32(self: &NativeIndex, query: &[f32], count: usize) -> Result; - pub fn exact_search_f64(self: &NativeIndex, query: &[f64], count: usize) + pub fn exact_search_f16(self: &NativeIndex, query: &[i16], count: usize) + -> Result; + pub fn exact_search_i8(self: &NativeIndex, query: &[i8], count: usize) -> Result; + pub fn exact_search_u8(self: &NativeIndex, query: &[u8], count: usize) -> Result; + pub fn exact_search_b1x8(self: &NativeIndex, query: &[u8], count: usize) -> Result; - pub fn filtered_search_b1x8( + pub fn filtered_search_f64( self: &NativeIndex, - query: &[u8], + query: &[f64], count: usize, filter: usize, filter_state: usize, ) -> Result; - pub fn filtered_search_i8( + pub fn filtered_search_f32( self: &NativeIndex, - query: &[i8], + query: &[f32], count: usize, filter: usize, filter_state: usize, @@ -392,26 +446,34 @@ pub mod ffi { filter: usize, filter_state: usize, ) -> Result; - pub fn filtered_search_f32( + pub fn filtered_search_i8( self: &NativeIndex, - query: &[f32], + query: &[i8], count: usize, filter: usize, filter_state: usize, ) -> Result; - pub fn filtered_search_f64( + pub fn filtered_search_u8( self: &NativeIndex, - query: &[f64], + query: &[u8], + count: usize, + filter: usize, + filter_state: usize, + ) -> Result; + pub fn filtered_search_b1x8( + self: &NativeIndex, + query: &[u8], count: usize, filter: usize, filter_state: usize, ) -> Result; - pub fn get_b1x8(self: &NativeIndex, key: u64, buffer: &mut [u8]) -> Result; - pub fn get_i8(self: &NativeIndex, key: u64, buffer: &mut [i8]) -> Result; - pub fn get_f16(self: &NativeIndex, key: u64, buffer: &mut [i16]) -> Result; - pub fn get_f32(self: &NativeIndex, key: u64, buffer: &mut [f32]) -> Result; pub fn get_f64(self: &NativeIndex, key: u64, buffer: &mut [f64]) -> Result; + pub fn get_f32(self: &NativeIndex, key: u64, buffer: &mut [f32]) -> Result; + pub fn get_f16(self: &NativeIndex, key: u64, buffer: &mut [i16]) -> Result; + pub fn get_i8(self: &NativeIndex, key: u64, buffer: &mut [i8]) -> Result; + pub fn get_u8(self: &NativeIndex, key: u64, buffer: &mut [u8]) -> Result; + pub fn get_b1x8(self: &NativeIndex, key: u64, buffer: &mut [u8]) -> Result; pub fn remove(self: &NativeIndex, key: u64) -> Result; pub fn rename(self: &NativeIndex, from: u64, to: u64) -> Result; @@ -423,6 +485,7 @@ pub mod ffi { pub fn view(self: &NativeIndex, path: &str) -> Result<()>; pub fn reset(self: &NativeIndex) -> Result<()>; pub fn memory_usage(self: &NativeIndex) -> usize; + pub fn memory_stats(self: &NativeIndex) -> MemoryStats; pub fn hardware_acceleration(self: &NativeIndex) -> *const c_char; pub fn save_to_buffer(self: &NativeIndex, buffer: &mut [u8]) -> Result<()>; @@ -432,7 +495,7 @@ pub mod ffi { } // Re-export the FFI structs and enums at the crate root for easy access -pub use ffi::{IndexOptions, MetricKind, ScalarKind}; +pub use ffi::{IndexOptions, MemoryStats, MetricKind, ScalarKind}; /// Represents custom metric functions for calculating distances between vectors in various formats. /// @@ -446,6 +509,7 @@ pub use ffi::{IndexOptions, MetricKind, ScalarKind}; /// /// - `B1X8Metric`: A metric function for binary vectors packed in `u8` containers, represented here by `b1x8`. /// - `I8Metric`: A metric function for vectors of 8-bit signed integers (`i8`). +/// - `U8Metric`: A metric function for vectors of 8-bit unsigned integers (`u8`). /// - `F16Metric`: A metric function for vectors of 16-bit half-precision floating-point numbers (`f16`). /// - `F32Metric`: A metric function for vectors of 32-bit floating-point numbers (`f32`). /// - `F64Metric`: A metric function for vectors of 64-bit floating-point numbers (`f64`). @@ -484,6 +548,7 @@ pub use ffi::{IndexOptions, MetricKind, ScalarKind}; pub enum MetricFunction { B1X8Metric(*mut std::boxed::Box Distance + Send + Sync>), I8Metric(*mut std::boxed::Box Distance + Send + Sync>), + U8Metric(*mut std::boxed::Box Distance + Send + Sync>), F16Metric(*mut std::boxed::Box Distance + Send + Sync>), F32Metric(*mut std::boxed::Box Distance + Send + Sync>), F64Metric(*mut std::boxed::Box Distance + Send + Sync>), @@ -543,6 +608,9 @@ impl Drop for Index { MetricFunction::I8Metric(pointer) => unsafe { drop(Box::from_raw(*pointer)); }, + MetricFunction::U8Metric(pointer) => unsafe { + drop(Box::from_raw(*pointer)); + }, MetricFunction::F16Metric(pointer) => unsafe { drop(Box::from_raw(*pointer)); }, @@ -843,6 +911,73 @@ impl VectorType for i8 { } } +impl VectorType for u8 { + fn search(index: &Index, query: &[Self], count: usize) -> Result { + index.inner.search_u8(query, count) + } + + fn exact_search( + index: &Index, + query: &[Self], + count: usize, + ) -> Result { + index.inner.exact_search_u8(query, count) + } + + fn get(index: &Index, key: Key, vector: &mut [Self]) -> Result { + index.inner.get_u8(key, vector) + } + + fn add(index: &Index, key: Key, vector: &[Self]) -> Result<(), cxx::Exception> { + index.inner.add_u8(key, vector) + } + + fn filtered_search( + index: &Index, + query: &[Self], + count: usize, + filter: F, + ) -> Result + where + Self: Sized, + F: Fn(Key) -> bool, + { + extern "C" fn trampoline bool>(key: u64, closure_address: usize) -> bool { + let closure = closure_address as *const F; + unsafe { (*closure)(key) } + } + + let trampoline_fn: usize = trampoline:: as *const () as usize; + let closure_address: usize = &filter as *const F as usize; + index + .inner + .filtered_search_u8(query, count, trampoline_fn, closure_address) + } + fn change_metric( + index: &mut Index, + metric: std::boxed::Box Distance + Send + Sync>, + ) -> Result<(), cxx::Exception> { + type MetricFn = Box Distance>; + index.metric_fn = Some(MetricFunction::U8Metric(Box::into_raw(Box::new(metric)))); + + extern "C" fn trampoline(first: usize, second: usize, closure_address: usize) -> Distance { + let first_ptr = first as *const u8; + let second_ptr = second as *const u8; + let closure: *mut MetricFn = closure_address as *mut MetricFn; + unsafe { (*closure)(first_ptr, second_ptr) } + } + + let trampoline_fn: usize = trampoline as *const () as usize; + let closure_address = match index.metric_fn { + Some(MetricFunction::U8Metric(metric)) => metric as *mut () as usize, + _ => panic!("Expected U8Metric"), + }; + index.inner.change_metric(trampoline_fn, closure_address); + + Ok(()) + } +} + impl VectorType for f64 { fn search(index: &Index, query: &[Self], count: usize) -> Result { index.inner.search_f64(query, count) @@ -957,12 +1092,9 @@ impl VectorType for f16 { // Temporarily cast the closure to a raw pointer for passing. let trampoline_fn: usize = trampoline:: as *const () as usize; let closure_address: usize = &filter as *const F as usize; - index.inner.filtered_search_f16( - f16::to_i16s(query), - count, - trampoline_fn, - closure_address, - ) + index + .inner + .filtered_search_f16(f16::to_i16s(query), count, trampoline_fn, closure_address) } fn change_metric( @@ -1035,12 +1167,9 @@ impl VectorType for b1x8 { // Temporarily cast the closure to a raw pointer for passing. let trampoline_fn: usize = trampoline:: as *const () as usize; let closure_address: usize = &filter as *const F as usize; - index.inner.filtered_search_b1x8( - b1x8::to_u8s(query), - count, - trampoline_fn, - closure_address, - ) + index + .inner + .filtered_search_b1x8(b1x8::to_u8s(query), count, trampoline_fn, closure_address) } fn change_metric( @@ -1285,55 +1414,67 @@ impl Index { self.inner.serialized_length() } - /// Removes the vector associated with the given key from the index. + /// Removes all vectors associated with the given key from the index. + /// In a multi-index, a single key may map to several vectors; this removes all of them. /// /// # Arguments /// - /// * `key` - The key of the vector to be removed. + /// * `key` - The key of the vector(s) to be removed. /// /// # Returns /// - /// `true` if the vector is successfully removed, `false` otherwise. + /// The number of vectors that were removed. Zero when the key is absent. + /// + /// # Example + /// + /// ```rust,ignore + /// index.add(42, &vec)?; + /// assert_eq!(index.remove(42)?, 1); + /// assert_eq!(index.remove(42)?, 0); // already gone + /// ``` pub fn remove(self: &Index, key: Key) -> Result { self.inner.remove(key) } - /// Renames the vector under a specific key. + /// Reassigns every vector stored under `from` to the new key `to`. + /// The original key is freed and subsequent lookups should use `to`. /// /// # Arguments /// - /// * `from` - The key of the vector to be renamed. - /// * `to` - The new name. + /// * `from` - The current key. + /// * `to` - The key that will replace it. /// /// # Returns /// - /// `true` if the vector is renamed, `false` otherwise. + /// The number of vectors that were reassigned. Zero when `from` is absent. + /// + /// # Example + /// + /// ```rust,ignore + /// index.add(1, &vec)?; + /// assert_eq!(index.rename(1, 2)?, 1); + /// assert!(!index.contains(1)); + /// assert!(index.contains(2)); + /// ``` pub fn rename(self: &Index, from: Key, to: Key) -> Result { self.inner.rename(from, to) } - /// Checks if the index contains a vector with a specified key. + /// Checks whether at least one vector with the given key exists in the index. /// /// # Arguments /// - /// * `key` - The key to be checked. - /// - /// # Returns - /// - /// `true` if the index contains the vector with the given key, `false` otherwise. + /// * `key` - The key to look up. pub fn contains(self: &Index, key: Key) -> bool { self.inner.contains(key) } - /// Count the count of vectors with the same specified key. + /// Returns the number of vectors stored under the given key. + /// Always 0 or 1 for a unique index; may be greater than 1 when `multi` is enabled. /// /// # Arguments /// - /// * `key` - The key to be checked. - /// - /// # Returns - /// - /// Number of vectors found. + /// * `key` - The key to look up. pub fn count(self: &Index, key: Key) -> usize { self.inner.count(key) } @@ -1376,6 +1517,12 @@ impl Index { self.inner.memory_usage() } + /// Returns detailed memory statistics with separate breakdowns for the graph + /// and vectors allocator tapes. + pub fn memory_stats(self: &Index) -> ffi::MemoryStats { + self.inner.memory_stats() + } + /// Saves the index to a specified file. /// /// # Arguments @@ -1453,7 +1600,7 @@ mod tests { env::var("RUST_VERSION").unwrap_or_else(|_| "unknown".into()) ); - // Create indexes with different configurations + // Create indexes with different configurations, ordered by descending dynamic range let f64_index = Index::new(&IndexOptions { dimensions: 256, metric: MetricKind::Cos, @@ -1470,6 +1617,14 @@ mod tests { }) .unwrap(); + let bf16_index = Index::new(&IndexOptions { + dimensions: 256, + metric: MetricKind::Cos, + quantization: ScalarKind::BF16, + ..Default::default() + }) + .unwrap(); + let f16_index = Index::new(&IndexOptions { dimensions: 256, metric: MetricKind::Cos, @@ -1478,6 +1633,22 @@ mod tests { }) .unwrap(); + let e5m2_index = Index::new(&IndexOptions { + dimensions: 256, + metric: MetricKind::Cos, + quantization: ScalarKind::E5M2, + ..Default::default() + }) + .unwrap(); + + let e4m3_index = Index::new(&IndexOptions { + dimensions: 256, + metric: MetricKind::Cos, + quantization: ScalarKind::E4M3, + ..Default::default() + }) + .unwrap(); + let i8_index = Index::new(&IndexOptions { dimensions: 256, metric: MetricKind::Cos, @@ -1486,6 +1657,14 @@ mod tests { }) .unwrap(); + let u8_index = Index::new(&IndexOptions { + dimensions: 256, + metric: MetricKind::Cos, + quantization: ScalarKind::U8, + ..Default::default() + }) + .unwrap(); + let b1_index = Index::new(&IndexOptions { dimensions: 256, metric: MetricKind::Hamming, @@ -1502,14 +1681,30 @@ mod tests { "f32 hardware acceleration: {}", f32_index.hardware_acceleration() ); + println!( + "bf16 hardware acceleration: {}", + bf16_index.hardware_acceleration() + ); println!( "f16 hardware acceleration: {}", f16_index.hardware_acceleration() ); + println!( + "e5m2 hardware acceleration: {}", + e5m2_index.hardware_acceleration() + ); + println!( + "e4m3 hardware acceleration: {}", + e4m3_index.hardware_acceleration() + ); println!( "i8 hardware acceleration: {}", i8_index.hardware_acceleration() ); + println!( + "u8 hardware acceleration: {}", + u8_index.hardware_acceleration() + ); println!( "b1 hardware acceleration: {}", b1_index.hardware_acceleration() @@ -1518,7 +1713,44 @@ mod tests { } #[test] - fn test_add_get_vector() { + fn new_index_does_not_preallocate_members() { + let options = IndexOptions { + dimensions: 8, + quantization: ScalarKind::F32, + ..Default::default() + }; + let index = Index::new(&options).unwrap(); + + // Regression check: constructor should preserve `index_limits_t{}` behavior + // and avoid reserving member slots up front. + assert_eq!(index.capacity(), 0); + } + + #[test] + fn index_survives_box_and_arc_moves_after_construction() { + let options = IndexOptions { + dimensions: 4, + quantization: ScalarKind::F32, + ..Default::default() + }; + let vector = [0.25f32, 0.5, 0.75, 1.0]; + + let boxed = Box::new(Index::new(&options).unwrap()); + boxed.reserve(8).unwrap(); + boxed.add(7, &vector).unwrap(); + let boxed_matches = boxed.search(&vector, 1).unwrap(); + assert_eq!(boxed_matches.keys.first().copied(), Some(7)); + + let arc = std::sync::Arc::new(Index::new(&options).unwrap()); + let moved_arc = std::sync::Arc::clone(&arc); + moved_arc.reserve_capacity_and_threads(8, 2).unwrap(); + moved_arc.add(9, &vector).unwrap(); + let arc_matches = arc.search(&vector, 1).unwrap(); + assert_eq!(arc_matches.keys.first().copied(), Some(9)); + } + + #[test] + fn add_get_vector() { let options = IndexOptions { dimensions: 5, quantization: ScalarKind::F32, @@ -1553,8 +1785,9 @@ mod tests { let result = index.get(1, &mut found); assert!(result.is_err()); } + #[test] - fn test_search_vector() { + fn search_vector() { let options = IndexOptions { dimensions: 5, quantization: ScalarKind::F32, @@ -1567,18 +1800,24 @@ mod tests { let second: [f32; 5] = [0.3, 0.2, 0.4, 0.0, 0.1]; let too_long: [f32; 6] = [0.3, 0.2, 0.4, 0.0, 0.1, 0.1]; let too_short: [f32; 4] = [0.3, 0.2, 0.4, 0.0]; + + // Search on empty index should return zero results + let empty_results = index.search(&first, 10).unwrap(); + assert_eq!(empty_results.keys.len(), 0); + assert!(index.add(1, &first).is_ok()); assert!(index.add(2, &second).is_ok()); assert_eq!(index.size(), 2); - //assert!(index.add(3, &too_long).is_err()); - //assert!(index.add(4, &too_short).is_err()); + // Vectors that were not added - shouldn't be visible! + // assert!(index.add(3, &too_long).is_err()); + // assert!(index.add(4, &too_short).is_err()); assert!(index.search(&too_long, 1).is_err()); assert!(index.search(&too_short, 1).is_err()); } #[test] - fn test_add_remove_vector() { + fn add_remove_vector() { let options = IndexOptions { dimensions: 4, metric: MetricKind::IP, @@ -1601,15 +1840,25 @@ mod tests { let id3 = 483367403120624232; let id4 = 483367403120624233; + // Add and verify contains/count + assert!(!index.contains(id1)); + assert_eq!(index.count(id1), 0); assert!(index.add(id1, &first).is_ok()); - let mut found_slice = [0.0f32; 4]; - assert_eq!(index.get(id1, &mut found_slice).unwrap(), 1); - assert!(index.remove(id1).is_ok()); + assert!(index.contains(id1)); + assert_eq!(index.count(id1), 1); - assert!(index.add(id2, &second).is_ok()); + // Rename id1 โ†’ id2 and verify the move + assert_eq!(index.rename(id1, id2).unwrap(), 1); + assert!(!index.contains(id1)); + assert!(index.contains(id2)); let mut found_slice = [0.0f32; 4]; assert_eq!(index.get(id2, &mut found_slice).unwrap(), 1); + assert_eq!(found_slice, first); + + // Remove and verify assert!(index.remove(id2).is_ok()); + assert!(!index.contains(id2)); + assert_eq!(index.count(id2), 0); assert!(index.add(id3, &second).is_ok()); let mut found_slice = [0.0f32; 4]; @@ -1682,9 +1931,17 @@ mod tests { assert_eq!(results.keys.len(), 2); println!("--------------------------------------------------"); - // Validate serialization + let stats = index.memory_stats(); + assert!( + stats.vectors_allocated > 0, + "vectors should have allocated memory" + ); + + // Validate serialization with round-trip content checks assert!(index.save("index.rust.usearch").is_ok()); assert!(index.load("index.rust.usearch").is_ok()); + let results = index.search(&first, 10).unwrap(); + assert!(results.keys.contains(&42), "key 42 survives save/load"); assert!(index.view("index.rust.usearch").is_ok()); // Make sure every function is called at least once @@ -1698,6 +1955,7 @@ mod tests { options.dimensions = 2; assert!(new_index(&options).is_ok()); + // Buffer serialization with round-trip content checks let mut serialization_buffer = vec![0; index.serialized_length()]; assert!(index.save_to_buffer(&mut serialization_buffer).is_ok()); @@ -1706,14 +1964,40 @@ mod tests { .load_from_buffer(&serialization_buffer) .is_ok()); assert_eq!(index.size(), deserialized_index.size()); + let results = deserialized_index.search(&first, 10).unwrap(); + assert!( + results.keys.contains(&42), + "key 42 survives buffer round-trip" + ); + + // Borrow the buffer as a read-only view instead of deserializing + let viewed_index = Index::new(&IndexOptions { + dimensions: 5, + ..Default::default() + }) + .unwrap(); + assert!(unsafe { viewed_index.view_from_buffer(&serialization_buffer) }.is_ok()); + assert_eq!(viewed_index.size(), index.size()); + let results = viewed_index.search(&first, 10).unwrap(); + assert!( + results.keys.contains(&42), + "key 42 visible via view_from_buffer" + ); - // reset + // After a full reset the index must be reusable from scratch assert_ne!(index.memory_usage(), 0); assert!(index.reset().is_ok()); assert_eq!(index.size(), 0); assert_eq!(index.memory_usage(), 0); - // clone + assert!(index.reserve(10).is_ok()); + assert!(index.add(100, &first).is_ok()); + assert!(index.add(101, &second).is_ok()); + assert_eq!(index.size(), 2); + let results = index.search(&first, 10).unwrap(); + assert_eq!(results.keys.len(), 2); + + // Clone options.metric = MetricKind::Haversine; let mut opts = options.clone(); assert_eq!(opts.metric, options.metric); @@ -1725,7 +2009,7 @@ mod tests { } #[test] - fn test_search_with_stateless_filter() { + fn search_with_stateless_filter() { let options = IndexOptions { dimensions: 5, ..Default::default() @@ -1750,7 +2034,7 @@ mod tests { } #[test] - fn test_search_with_stateful_filter() { + fn search_with_stateful_filter() { use std::collections::HashSet; let options = IndexOptions { @@ -1781,7 +2065,7 @@ mod tests { } #[test] - fn test_zero_distances() { + fn zero_distances() { let options = IndexOptions { dimensions: 8, metric: MetricKind::L2sq, @@ -1811,7 +2095,7 @@ mod tests { } #[test] - fn test_exact_search() { + fn exact_search() { use std::collections::HashSet; // Create an index with many vectors @@ -1866,7 +2150,7 @@ mod tests { } #[test] - fn test_change_distance_function() { + fn change_distance_function() { let options = IndexOptions { dimensions: 2, // Adjusted for simplicity in creating test vectors ..Default::default() @@ -1894,7 +2178,7 @@ mod tests { } #[test] - fn test_binary_vectors_and_hamming_distance() { + fn binary_vectors_and_hamming_distance() { let index = Index::new(&IndexOptions { dimensions: 8, metric: MetricKind::Hamming, @@ -1924,9 +2208,47 @@ mod tests { } #[test] - fn test_concurrency() { + fn multi_index() { + let options = IndexOptions { + dimensions: 4, + metric: MetricKind::L2sq, + quantization: ScalarKind::F32, + multi: true, + ..Default::default() + }; + let index = Index::new(&options).unwrap(); + index.reserve(10).unwrap(); + + let vec_a: [f32; 4] = [1.0, 0.0, 0.0, 0.0]; + let vec_b: [f32; 4] = [0.0, 1.0, 0.0, 0.0]; + let key: Key = 42; + + // Two vectors under the same key + index.add(key, &vec_a).unwrap(); + index.add(key, &vec_b).unwrap(); + assert_eq!(index.size(), 2); + assert_eq!(index.count(key), 2); + assert!(index.contains(key)); + + // Retrieve both vectors + let mut buf = [0.0f32; 8]; // 2 * dims + let found = index.get(key, &mut buf).unwrap(); + assert_eq!(found, 2); + + // Export convenience + let mut exported: Vec = Vec::new(); + assert_eq!(index.export(key, &mut exported).unwrap(), 2); + assert_eq!(exported.len(), 8); + + // Search should find the key + let results = index.search(&vec_a, 5).unwrap(); + assert!(results.keys.contains(&key)); + } + + #[test] + fn concurrency() { use fork_union as fu; - use rand::{Rng, SeedableRng}; + use rand::{RngExt, SeedableRng}; use rand_chacha::ChaCha8Rng; use rand_distr::Uniform; use std::sync::Arc; diff --git a/setup.py b/setup.py index 929c8b528..88b32c74f 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,10 @@ import os -import sys -import subprocess import platform -from setuptools import setup +import subprocess +import sys from pybind11.setup_helpers import Pybind11Extension +from setuptools import setup compile_args = [] link_args = [] @@ -41,44 +41,60 @@ def get_bool_env_w_name(name: str, preference: bool) -> tuple: pass -# ? Is there a way we can bring back SimSIMD on Windows? -# ? Using `ctypes.CDLL(simsimd.__file__)` breaks the CI +# ? Is there a way we can bring back NumKong on Windows? +# ? Using `ctypes.CDLL(numkong.__file__)` breaks the CI # ? with "Windows fatal exception: access violation". -prefer_simsimd: bool = not is_windows -prefer_fp16lib: bool = True +prefer_numkong: bool = not is_windows prefer_openmp: bool = is_linux and is_gcc -use_simsimd: bool = get_bool_env("USEARCH_USE_SIMSIMD", prefer_simsimd) -use_fp16lib: bool = get_bool_env("USEARCH_USE_FP16LIB", prefer_fp16lib) +use_numkong: bool = get_bool_env("USEARCH_USE_NUMKONG", prefer_numkong) use_openmp: bool = get_bool_env("USEARCH_USE_OPENMP", prefer_openmp) # Common arguments for all platforms macros_args.append(("USEARCH_USE_OPENMP", "1" if use_openmp else "0")) -macros_args.append(("USEARCH_USE_FP16LIB", "1" if use_fp16lib else "0")) -macros_args.append(("USEARCH_USE_SIMSIMD", "1" if use_simsimd else "0")) +macros_args.append(("USEARCH_USE_NUMKONG", "1" if use_numkong else "0")) -#! Unlike OpenMP and FP16LIB, the SimSIMD is integrated differently. -#! It will anyways use dynamic dispatch, and will not build the library as part of `usearch` package. -#! It relies on the fact that SimSIMD ships it's own bindings for most platforms, and the user should +#! NumKong uses dynamic dispatch, and will not build the library as part of `usearch` package. +#! It relies on the fact that NumKong ships its own bindings for most platforms, and the user should #! install it separately! macros_args.extend( [ - ("SIMSIMD_DYNAMIC_DISPATCH", "1" if use_simsimd else "0"), - ("SIMSIMD_TARGET_NEON", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_NEON_BF16", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_NEON_F16", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_NEON_I8", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_SVE", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_SVE_BF16", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_SVE_F16", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_SVE_I8", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_SVE2", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_HASWELL", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_SKYLAKE", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_ICE", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_SAPPHIRE", "0"), # ? Hide-out all complex intrinsics - ("SIMSIMD_TARGET_GENOA", "0"), # ? Hide-out all complex intrinsics + ("NK_DYNAMIC_DISPATCH", "1" if use_numkong else "0"), + ("NK_TARGET_NEON", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_NEONBFDOT", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_NEONHALF", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_NEONSDOT", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SVE", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SVEBFDOT", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SVEHALF", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SVESDOT", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SVE2", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_HASWELL", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SKYLAKE", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_ICELAKE", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SAPPHIRE", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_GENOA", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_NEONFHM", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SVE2P1", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_TURIN", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SIERRA", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_ALDER", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SAPPHIREAMX", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_GRANITEAMX", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SME", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SME2", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SME2P1", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SMEF64", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SMEFA64", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SMEHALF", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SMEBF16", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_SMELUT2", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_RVV", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_RVVHALF", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_RVVBF16", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_RVVBB", "0"), # ? Hide-out all complex intrinsics + ("NK_TARGET_V128RELAXED", "0"), # ? Hide-out all complex intrinsics ] ) @@ -92,7 +108,7 @@ def get_bool_env_w_name(name: str, preference: bool) -> tuple: # Simplify debugging, but the normal `-g` may make builds much longer! compile_args.append("-g1") - # Linking to SimSIMD + # Linking to NumKong compile_args.append("-Wl,--unresolved-symbols=ignore-in-shared-libs") link_args.append("-static-libstdc++") @@ -102,7 +118,7 @@ def get_bool_env_w_name(name: str, preference: bool) -> tuple: if is_macos: # MacOS 10.15 or higher is needed for `aligned_alloc` support. - # https://github.com/unum-cloud/usearch/actions/runs/4975434891/jobs/8902603392 + # https://github.com/unum-cloud/USearch/actions/runs/4975434891/jobs/8902603392 compile_args.append("-mmacosx-version-min=10.15") compile_args.append("-std=c++17") compile_args.append("-O3") # Maximize performance @@ -113,6 +129,10 @@ def get_bool_env_w_name(name: str, preference: bool) -> tuple: # Simplify debugging, but the normal `-g` may make builds much longer! compile_args.append("-g1") + # NumKong symbols are resolved at runtime via ctypes.CDLL in __init__.py + link_args.append("-undefined") + link_args.append("dynamic_lookup") + # Linking OpenMP requires additional preparation in CIBuildWheel. # We must install `brew install llvm` ahead of time. # import subprocess as cli @@ -129,13 +149,15 @@ def get_bool_env_w_name(name: str, preference: bool) -> tuple: compile_args.append("/O2") compile_args.append("/fp:fast") # Enable fast math for MSVC compile_args.append("/W1") # Reduce warnings verbosity - link_args.append("/FORCE") # Force linkin with missing SimSIMD symbols + link_args.append("/FORCE") # Force linking with missing NumKong symbols + +sources = ["python/lib.cpp"] ext_modules = [ Pybind11Extension( "usearch.compiled", - ["python/lib.cpp"], + sources, extra_compile_args=compile_args, extra_link_args=link_args, define_macros=macros_args, @@ -160,11 +182,9 @@ def get_bool_env_w_name(name: str, preference: bool) -> tuple: "numpy", "tqdm", ] -if use_simsimd: - include_dirs.append("simsimd/include") - install_requires.append("simsimd>=6.0.5,<7.0.0") -if use_fp16lib: - include_dirs.append("fp16/include") +if use_numkong: + include_dirs.append("numkong/include") + install_requires.append("numkong") # With Clang, `setuptools` doesn't properly use the `language="c++"` argument we pass. @@ -181,19 +201,21 @@ def get_bool_env_w_name(name: str, preference: bool) -> tuple: version=__version__, packages=["usearch"], package_dir={"usearch": "python/usearch"}, + package_data={"usearch": ["compiled.pyi", "py.typed"]}, description="Smaller & Faster Single-File Vector Search Engine from Unum", author="Ash Vardanian", author_email="info@unum.cloud", - url="https://github.com/unum-cloud/usearch", + python_requires=">=3.10", + url="https://github.com/unum-cloud/USearch", long_description=long_description, long_description_content_type="text/markdown", license="Apache-2.0", + license_files=["LICENSE"], classifiers=[ "Development Status :: 5 - Production/Stable", "Natural Language :: English", "Intended Audience :: Developers", "Intended Audience :: Information Technology", - "License :: OSI Approved :: Apache Software License", "Programming Language :: C++", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: Implementation :: CPython", diff --git a/simsimd b/simsimd deleted file mode 160000 index b311d17ab..000000000 --- a/simsimd +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b311d17ab9e13ebddb3a4ebda9d74a78109a276d diff --git a/sqlite/README.md b/sqlite/README.md index bc8a489b8..fcbb7cf50 100644 --- a/sqlite/README.md +++ b/sqlite/README.md @@ -9,7 +9,7 @@ This includes: - [x] Haversine distance for geographical coordinates. The SIMD-acceleration covers AVX2, most subsets of AVX512, ARM NEON, and Arm SVE instruction sets, more than most BLAS libraries. -The implementations are coming from [SimSIMD](https://github.com/ashvardanian/simsimd) and [StringZilla](https://github.com/ashvardanian/stringzilla). +The implementations are coming from [NumKong](https://github.com/ashvardanian/numkong) and [StringZilla](https://github.com/ashvardanian/stringzilla). They are most efficient when vectors are stored as BLOBs, but for broader compatibility can also handle JSONs, and even separate columns containing vector elements. ## Installation diff --git a/stringzilla b/stringzilla index 2f4b1386c..30d3e2129 160000 --- a/stringzilla +++ b/stringzilla @@ -1 +1 @@ -Subproject commit 2f4b1386ca2ed3c4178c1e4f467a2e78a911f3b2 +Subproject commit 30d3e2129654d8269b3f66726414f9694c834e25 diff --git a/swift/README.md b/swift/README.md index a41d2a345..21a7c20c7 100644 --- a/swift/README.md +++ b/swift/README.md @@ -7,7 +7,7 @@ To install it, simply add the following line to your `Package.swift`: ```swift dependencies: [ - .package(url: "https://github.com/unum-cloud/usearch", .upToNextMajor(from: "2.0.0")) + .package(url: "https://github.com/unum-cloud/USearch", .upToNextMajor(from: "2.0.0")) ] ``` diff --git a/swift/Test.swift b/swift/Test.swift index 34987c226..bd3b77d5a 100644 --- a/swift/Test.swift +++ b/swift/Test.swift @@ -90,36 +90,6 @@ class Test: XCTestCase { XCTAssertEqual(try index.count(key: 49), 0) } - func testIssue399() throws { - let index = try USearchIndex.make( - metric: USearchMetric.l2sq, - dimensions: 1, - connectivity: 8, - quantization: USearchScalar.f32 - ) - try index.reserve(3) - - // add 3 entries then ensure all 3 are returned - try index.add(key: 1, vector: [1.1]) - try index.add(key: 2, vector: [2.1]) - try index.add(key: 3, vector: [3.1]) - try XCTAssertEqual(index.count, 3) - XCTAssertEqual(try index.search(vector: [1.0], count: 3).0, [1, 2, 3]) // works ๐Ÿ˜Ž - - // replace second-added entry then ensure all 3 are still returned - try _ = index.remove(key: 2) - try index.add(key: 2, vector: [2.2]) - try XCTAssertEqual(index.count, 3) - XCTAssertEqual(try index.search(vector: [1.0], count: 3).0, [1, 2, 3]) // works ๐Ÿ˜Ž - - // replace first-added entry then ensure all 3 are still returned - try _ = index.remove(key: 1) - try index.add(key: 1, vector: [1.2]) - let afterReplacingInitial = try index.search(vector: [1.0], count: 3).0 - try XCTAssertEqual(index.count, 3) - XCTAssertEqual(afterReplacingInitial, [1, 2, 3]) // v2.11.7 fails with "[1] != [1, 2, 3]" ๐Ÿ˜จ - } - func testFilteredSearchSingle() throws { let index = try USearchIndex.make( metric: USearchMetric.l2sq, diff --git a/swift/USearchIndex+Sugar.swift b/swift/USearchIndex+Sugar.swift index f995dc503..63ba0db16 100644 --- a/swift/USearchIndex+Sugar.swift +++ b/swift/USearchIndex+Sugar.swift @@ -222,6 +222,110 @@ extension USearchIndex { try filteredSearch(vector: vector[...], count: count, filter: filter) } + /// Adds a labeled vector to the index. + /// - Parameter key: Unique identifier for that object. + /// - Parameter vector: Uint8 vector. + /// - Throws: If runs out of memory. + public func add(key: Key, vector: ArraySlice) throws { + try vector.withContiguousStorageIfAvailable { + try addU8(key: key, vector: $0.baseAddress!) + } + } + + /// Adds a labeled vector to the index. + /// - Parameter key: Unique identifier for that object. + /// - Parameter vector: Uint8 vector. + /// - Throws: If runs out of memory. + public func add(key: Key, vector: [UInt8]) throws { + try add(key: key, vector: vector[...]) + } + + /// Approximate nearest neighbors search. + /// - Parameter vector: Uint8 query vector. + /// - Parameter count: Upper limit on the number of matches to retrieve. + /// - Returns: Labels and distances to closest approximate matches in decreasing similarity order. + /// - Throws: If runs out of memory. + public func search(vector: ArraySlice, count: Int) throws -> ([Key], [Float]) { + var matches: [Key] = Array(repeating: 0, count: count) + var distances: [Float] = Array(repeating: 0, count: count) + let results = try vector.withContiguousStorageIfAvailable { + try searchU8(vector: $0.baseAddress!, count: CUnsignedInt(count), keys: &matches, distances: &distances) + } + matches.removeLast(count - Int(results!)) + distances.removeLast(count - Int(results!)) + return (matches, distances) + } + + /// Approximate nearest neighbors search. + /// - Parameter vector: Uint8 query vector. + /// - Parameter count: Upper limit on the number of matches to retrieve. + /// - Returns: Labels and distances to closest approximate matches in decreasing similarity order. + /// - Throws: If runs out of memory. + public func search(vector: [UInt8], count: Int) throws -> ([Key], [Float]) { + try search(vector: vector[...], count: count) + } + + /// Retrieve vectors for a given key. + /// - Parameter key: Unique identifier for that object. + /// - Parameter count: For multi-indexes, Number of vectors to retrieve. Defaults to 1. + /// - Returns: Two-dimensional array of Uint8 vectors. + /// - Throws: If runs out of memory. + public func get(key: USearchKey, count: Int = 1) throws -> [[UInt8]]? { + var vector: [UInt8] = try Array(repeating: 0, count: Int(self.dimensions) * count) + let returnedCount = try vector.withContiguousMutableStorageIfAvailable { buf in + guard let baseAddress = buf.baseAddress else { return UInt32(0) } + return try getU8( + key: key, + vector: baseAddress, + count: CUnsignedInt(count) + ) + } + guard let count = returnedCount, count > 0 else { return nil } + return try stride( + from: 0, + to: try Int(count) * Int(self.dimensions), + by: try Int(self.dimensions) + ).map { + try Array(vector[$0 ..< $0 + Int(self.dimensions)]) + } + } + + /// Approximate nearest neighbors search with filtering. + /// - Parameter vector: Uint8 query vector. + /// - Parameter count: Upper limit on the number of matches to retrieve. + /// - Parameter filter: Closure used to determine whether to skip a key in the results. + /// - Returns: Labels and distances to closest approximate matches in decreasing similarity order. + /// - Throws: If runs out of memory. + public func filteredSearch(vector: ArraySlice, count: Int, filter: @escaping FilterFn) throws -> ( + [Key], [Float] + ) { + var matches: [Key] = Array(repeating: 0, count: count) + var distances: [Float] = Array(repeating: 0, count: count) + let results = try vector.withContiguousStorageIfAvailable { + try filteredSearchU8( + vector: $0.baseAddress!, + count: + CUnsignedInt(count), + filter: filter, + keys: &matches, + distances: &distances + ) + } + matches.removeLast(count - Int(results!)) + distances.removeLast(count - Int(results!)) + return (matches, distances) + } + + /// Approximate nearest neighbors search with filtering. + /// - Parameter vector: Uint8 query vector. + /// - Parameter count: Upper limit on the number of matches to retrieve. + /// - Parameter filter: Closure used to determine whether to skip a key in the results. + /// - Returns: Labels and distances to closest approximate matches in decreasing similarity order. + /// - Throws: If runs out of memory. + public func filteredSearch(vector: [UInt8], count: Int, filter: @escaping FilterFn) throws -> ([Key], [Float]) { + try filteredSearch(vector: vector[...], count: count, filter: filter) + } + #if arch(arm64) /// Adds a labeled vector to the index. diff --git a/swift/USearchIndex.swift b/swift/USearchIndex.swift index 271b98df2..2c070e3ac 100644 --- a/swift/USearchIndex.swift +++ b/swift/USearchIndex.swift @@ -9,10 +9,16 @@ import Foundation import USearchC public enum USearchScalar: UInt { + case f64 case f32 + case bf16 case f16 - case f64 + case e5m2 + case e4m3 + case e3m2 + case e2m3 case i8 + case u8 case b1 } @@ -62,16 +68,28 @@ extension USearchMetric { extension USearchScalar { func toNative() -> usearch_scalar_kind_t { switch self { - case .i8: - return usearch_scalar_i8_k + case .f64: + return usearch_scalar_f64_k + case .f32: + return usearch_scalar_f32_k + case .bf16: + return usearch_scalar_bf16_k case .f16: return usearch_scalar_f16_k + case .e5m2: + return usearch_scalar_e5m2_k + case .e4m3: + return usearch_scalar_e4m3_k + case .e3m2: + return usearch_scalar_e3m2_k + case .e2m3: + return usearch_scalar_e2m3_k + case .i8: + return usearch_scalar_i8_k + case .u8: + return usearch_scalar_u8_k case .b1: return usearch_scalar_b1_k - case .f32: - return usearch_scalar_f32_k - case .f64: - return usearch_scalar_f64_k } } } @@ -157,6 +175,21 @@ public enum USearchError: Error { } } +/** Returns the USearch library version string. */ +public func usearchVersion() -> String { + String(cString: usearch_version()) +} + +/** Returns a comma-separated list of ISAs compiled into this binary. */ +public func usearchHardwareAccelerationCompiled() -> String { + String(cString: usearch_hardware_acceleration_compiled()) +} + +/** Returns a comma-separated list of ISAs available at runtime. */ +public func usearchHardwareAccelerationAvailable() -> String { + String(cString: usearch_hardware_acceleration_available()) +} + @available(iOS 13.0, macOS 11.0, tvOS 13.0, watchOS 6.0, visionOS 1.0, *) public class USearchIndex: NSObject { private var nativeIndex: usearch_index_t @@ -422,6 +455,79 @@ public class USearchIndex: NSObject { ) } + /** + * @brief Adds a labeled vector to the index. + * @param vector Uint8 vector. + */ + public func addU8(key: USearchKey, vector: UnsafePointer) throws { + try throwing { usearch_add(nativeIndex, key, vector, USearchScalar.u8.toNative(), $0) } + } + + /** + * @brief Approximate nearest neighbors search. + * @param vector Uint8 query vector. + * @param count Upper limit on the number of matches to retrieve. + * @param keys Optional output buffer for keys of approximate neighbors. + * @param distances Optional output buffer for (increasing) distances to approximate neighbors. + * @return Number of matches exported to `keys` and `distances`. + */ + public func searchU8( + vector: UnsafePointer, + count: UInt32, + keys: UnsafeMutablePointer?, + distances: UnsafeMutablePointer? + ) throws -> UInt32 { + let found = try throwing { + usearch_search(nativeIndex, vector, USearchScalar.u8.toNative(), Int(count), keys, distances, $0) + } + return UInt32(found) + } + + /** + * @brief Retrieves a labeled uint8 vector from the index. + * @param vector A buffer to store the vector. + * @param count For multi-indexes, the number of vectors to retrieve. + * @return Number of vectors exported to `vector`. + */ + public func getU8(key: USearchKey, vector: UnsafeMutablePointer, count: UInt32) throws -> UInt32 { + let result = try throwing { + usearch_get(nativeIndex, key, Int(count), vector, USearchScalar.u8.toNative(), $0) + } + return UInt32(result) + } + + /** + * @brief Approximate nearest neighbors search with filtering. + * @param vector Uint8 query vector. + * @param count Upper limit on the number of matches to retrieve. + * @param filter Closure called for each key, determining whether to include or skip key in the results. + * @param keys Optional output buffer for keys of approximate neighbors. + * @param distances Optional output buffer for (increasing) distances to approximate neighbors. + * @return Number of matches exported to `keys` and `distances`. + */ + public func filteredSearchU8( + vector: UnsafePointer, + count: UInt32, + filter: @escaping USearchFilterFn, + keys: UnsafeMutablePointer?, + distances: UnsafeMutablePointer? + ) throws -> UInt32 { + return try filteredSearchGeneric( + nativeIndex, + vector: vector, + count: count, + quantization: .u8, + filter: filter, + keys: keys, + distances: distances + ) + } + + // Float16 is only available on arm64 Apple platforms. The @available annotation + // alone is insufficient because Float16 is a type-level absence on x86_64, + // not a runtime availability issue. See: https://github.com/unum-cloud/usearch/issues/589 + #if arch(arm64) + /** * @brief Adds a labeled vector to the index. * @param vector Half-precision vector. @@ -495,6 +601,8 @@ public class USearchIndex: NSObject { ) } + #endif // arch(arm64) + public func contains(key: USearchKey) throws -> Bool { return try throwing { usearch_contains(nativeIndex, key, $0) } } diff --git a/wasm/README.md b/wasm/README.md index 5dce0ccee..dd378a89b 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -3,7 +3,7 @@ ## Installation ```txt -https://github.com/unum-cloud/usearch +https://github.com/unum-cloud/USearch ``` ## Quickstart diff --git a/wasmer.toml b/wasmer.toml index 67a96aa24..25a87521e 100644 --- a/wasmer.toml +++ b/wasmer.toml @@ -1,11 +1,11 @@ [package] -name = "unum/usearch" -version = "2.24.0" description = "Smaller & Faster Single-File Vector Search Engine from Unum" +homepage = "https://unum-cloud.github.io/USearch/javascript" license = "Apache-2.0" +name = "unum/usearch" readme = "README.md" -repository = "https://github.com/unum-cloud/usearch" -homepage = "https://unum-cloud.github.io/usearch/javascript" +repository = "https://github.com/unum-cloud/USearch" +version = "2.24.0" [[module]] name = "index"