diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cadac5c9f..eca8b266a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,369 +11,19 @@ on: - master jobs: - build-and-test-cpp-x86_64: - runs-on: ${{ matrix.os }} - env: - CT2_VERBOSE: 1 - strategy: - matrix: - os: [ubuntu-22.04] - backend: [mkl, dnnl] - - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Intel oneAPI - if: startsWith(matrix.os, 'ubuntu') - run: | - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add *.PUB - sudo sh -c 'echo "deb https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list' - sudo apt-get update - - - name: Configure with MKL - if: startsWith(matrix.os, 'ubuntu') && matrix.backend == 'mkl' - env: - CT2_USE_MKL: 1 - MKL_VERSION: 2023.0.0 - run: | - sudo apt-get install -y intel-oneapi-mkl-devel-$MKL_VERSION - cmake -DCMAKE_INSTALL_PREFIX=$PWD/install -DBUILD_TESTS=ON . - - - name: Configure with DNNL - if: startsWith(matrix.os, 'ubuntu') && matrix.backend == 'dnnl' - env: - DNNL_VERSION: 2023.0.0-25399 - run: | - sudo apt-get install -y intel-oneapi-dnnl-devel=$DNNL_VERSION intel-oneapi-dnnl=$DNNL_VERSION - cmake -DCMAKE_INSTALL_PREFIX=$PWD/install -DBUILD_TESTS=ON -DWITH_MKL=OFF -DOPENMP_RUNTIME=COMP -DWITH_DNNL=ON . - - - name: Build - run: | - make -j $(nproc) install - - - name: Download test data - working-directory: tests/data/models - run: | - wget https://opennmt-models.s3.amazonaws.com/pi_lm_step_5000.pt - wget https://opennmt-models.s3.amazonaws.com/transliteration-aren-all.tar.gz - tar xf transliteration-aren-all.tar.gz - - - name: Test MKL - if: matrix.backend == 'mkl' - env: - CT2_USE_MKL: 1 - run: | - tests/ctranslate2_test tests/data - - name: Test DNNL - if: matrix.backend == 'dnnl' - run: | - tests/ctranslate2_test tests/data - - build-and-test-cpp-x86_64-address_sanitizer: - runs-on: ubuntu-22.04 - env: - CT2_VERBOSE: 1 - - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Intel oneAPI - run: | - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add *.PUB - sudo sh -c 'echo "deb https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list' - sudo apt update - - - name: Install MKL - env: - CT2_USE_MKL: 1 - MKL_VERSION: 2023.0.0 - run: | - sudo apt install -y intel-oneapi-mkl-devel-$MKL_VERSION - - - name: Install Clang - run: | - sudo apt install -y clang - - - name: Configure with MKL and Clang - env: - CT2_USE_MKL: 1 - run: | - cmake -DCMAKE_INSTALL_PREFIX=$PWD/install -DBUILD_TESTS=ON -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -DENABLE_ADDRESS_SANITIZER=ON -DCMAKE_BUILD_TYPE=Debug . - - - name: Build - run: | - make -j $(nproc) install - - - name: Download test data - working-directory: tests/data/models - run: | - wget https://opennmt-models.s3.amazonaws.com/pi_lm_step_5000.pt - wget https://opennmt-models.s3.amazonaws.com/transliteration-aren-all.tar.gz - tar xf transliteration-aren-all.tar.gz - - - name: Test AddressSanitizer - env: - CT2_USE_MKL: 1 - run: | - ASAN_OPTIONS=detect_leaks=1:print_stats=1 tests/ctranslate2_test tests/data - - build-and-test-cpp-arm64: - runs-on: ${{ matrix.os }} - env: - CT2_VERBOSE: 1 - strategy: - matrix: - - include: - - os: ubuntu-22.04-arm - backend: openblas - - os: macos-15 - backend: ruy - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Build with OpenBLAS and Ruy - if: matrix.backend == 'openblas' - run: | - wget https://github.com/xianyi/OpenBLAS/archive/v0.3.13.tar.gz - tar xzvf v0.3.13.tar.gz - cd OpenBLAS-0.3.13 - make TARGET=ARMV8 NO_LAPACK=1 -j $(nproc) - sudo make PREFIX=/usr/local install -j $(nproc) - cd .. - export OpenBLAS_HOME=/usr/local - cmake \ - -DOPENMP_RUNTIME=COMP \ - -DCMAKE_INSTALL_PREFIX=$PWD/install \ - -DWITH_MKL=OFF \ - -DWITH_OPENBLAS=ON \ - -DWITH_RUY=ON \ - -DBUILD_TESTS=ON \ - . - make -j $(nproc) install - - - name: Build Ruy - if: matrix.backend == 'ruy' - run: | - CMAKE_EXTRA_OPTIONS='-DCMAKE_OSX_ARCHITECTURES=arm64 -DWITH_ACCELERATE=ON -DWITH_MKL=OFF -DOPENMP_RUNTIME=NONE -DWITH_RUY=ON' - cmake -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \ - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON \ - -DCMAKE_INSTALL_PREFIX=$PWD/install \ - -DBUILD_TESTS=ON \ - $CMAKE_EXTRA_OPTIONS \ - . - make -j $(nproc) install - - - name: Download test data - run: | - wget https://opennmt-models.s3.amazonaws.com/transliteration-aren-all.tar.gz - tar xf transliteration-aren-all.tar.gz -C tests/data/models/ - - - name: Test - run: | - tests/ctranslate2_test tests/data - - build-python-wheels: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-22.04, windows-2022] - arch: [auto64] - include: - - os: ubuntu-22.04 - arch: aarch64 - - os: macos-15 - arch: arm64 - - os: macos-15-intel - arch: x86_64 - - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - uses: docker/setup-qemu-action@v2 - if: ${{ matrix.arch == 'aarch64' }} - name: Set up QEMU - - - name: Build wheels - uses: pypa/cibuildwheel@v3.2.1 - with: - package-dir: python - output-dir: python/wheelhouse - env: - CIBW_ENVIRONMENT_PASS_LINUX: CIBW_ARCHS - CIBW_ENVIRONMENT_WINDOWS: CTRANSLATE2_ROOT='${{ github.workspace }}\install' - CIBW_ENVIRONMENT_MACOS: "CTRANSLATE2_ROOT='/usr/local' MACOSX_DEPLOYMENT_TARGET=11.00" - CIBW_BEFORE_ALL_LINUX: python/tools/prepare_build_environment_linux.sh - CIBW_BEFORE_ALL_MACOS: python/tools/prepare_build_environment_macos.sh - CIBW_BEFORE_ALL_WINDOWS: bash python/tools/prepare_build_environment_windows.sh - CIBW_BEFORE_BUILD: pip install -r python/install_requirements.txt - CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 - CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 - CIBW_ARCHS: ${{ matrix.arch }} - CIBW_SKIP: "*-musllinux_*" - - - name: Upload Python wheels - uses: actions/upload-artifact@v4 - with: - name: python-wheels-${{ runner.os }}-${{ matrix.arch }} - path: python/wheelhouse - - build-python-wheels-rocm: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-24.04, windows-2025] - - steps: - - uses: actions/checkout@v6 - with: - submodules: recursive - - - name: Build wheels - uses: pypa/cibuildwheel@v3.2.1 - with: - package-dir: python - output-dir: python/wheelhouse - env: - CIBW_ENVIRONMENT_LINUX: ROCM_PATH=/opt/rocm LD_LIBRARY_PATH=/opt/rocm/lib/llvm/lib:$LD_LIBRARY_PATH - CIBW_ENVIRONMENT_WINDOWS: CTRANSLATE2_ROOT='${{ github.workspace }}\install' - CIBW_BEFORE_ALL_LINUX: python/tools/prepare_build_environment_linux_rocm.sh - CIBW_BEFORE_ALL_WINDOWS: bash python/tools/prepare_build_environment_windows_rocm.sh - CIBW_BEFORE_BUILD: pip install -r python/install_requirements.txt - CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 - CIBW_ARCHS: auto64 - CIBW_SKIP: "*-musllinux_*" - CIBW_REPAIR_WHEEL_COMMAND_LINUX: 'auditwheel repair -w {dest_dir} --exclude "/opt/rocm/lib/lib*" {wheel}' - - - name: Upload Python wheels - uses: actions/upload-artifact@v6 - with: - name: rocm-python-wheels-${{ runner.os }} - path: python/wheelhouse - - # We could test the Python wheels using cibuildwheel but we prefer to run the tests outside - # the build environment to ensure wheels correctly embed all dependencies. - test-python-wheels: - needs: [build-python-wheels] + build-and-push-docker-images: runs-on: ${{ matrix.os }} strategy: matrix: include: - os: ubuntu-22.04 - artifact_pattern: python-wheels-Linux-auto64 - wheel_pattern: "*cp310*manylinux*x86_64.whl" - + gpu: cuda + - os: ubuntu-22.04 + gpu: rocm - os: ubuntu-22.04-arm - artifact_pattern: python-wheels-Linux-aarch64 - wheel_pattern: "*cp310*manylinux*_aarch64.whl" - - - os: windows-2022 - artifact_pattern: python-wheels-Windows-auto64 - wheel_pattern: "*cp310*win*amd64.whl" - - - os: macos-15 - artifact_pattern: python-wheels-macOS-arm64 - wheel_pattern: "*cp310*macosx*arm64.whl" - - steps: - - name: Set up Python 3.10.11 - uses: actions/setup-python@v5 - with: - python-version: "3.10.11" - - - uses: actions/checkout@v4 - - - name: Prepare test environment - shell: bash - run: | - ./python/tools/prepare_test_environment.sh - - - name: Download Python wheels - uses: actions/download-artifact@v4 - with: - pattern: ${{ matrix.artifact_pattern }} - merge-multiple: true - path: . - - - name: Install wheel - shell: bash - run: | - pip install ${{ matrix.wheel_pattern }} - - - name: Test Python wheel - run: | - pytest -v python/tests/ --ignore=python/tests/test_opennmt_tf.py - - - check-python-style: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python 3.10.11 - uses: actions/setup-python@v5 - with: - python-version: "3.10.11" - - - name: Install dependencies - run: | - python -m pip install black==22.* flake8==3.8.* isort==5.* - - - name: Check code format with Black - working-directory: python - run: | - black --check . - - - name: Check imports order with isort - working-directory: python - run: | - isort --check-only . - - - name: Check code style with Flake8 - working-directory: python - if: ${{ always() }} - run: | - flake8 . - - - publish-python-wheels-on-pypi: - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - needs: [build-and-test-cpp-x86_64, build-and-test-cpp-arm64, build-python-wheels, test-python-wheels, check-python-style] - runs-on: ubuntu-22.04 - - steps: - - name: Download Python wheels - uses: actions/download-artifact@v4 - with: - pattern: python-wheels-* - merge-multiple: true - path: . - - - name: Publish Python wheels to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} - packages-dir: . - - - build-and-push-docker-images: - runs-on: ubuntu-22.04 - strategy: - matrix: - gpu: [cuda, rocm] + gpu: cudaarm + suffix: -arm steps: - uses: actions/checkout@v4 @@ -405,7 +55,7 @@ jobs: - name: Build Docker images run: | - ./docker/build_all.sh latest 0 ${{ matrix.gpu }} + ./docker/build_all.sh latest 0 ${{ matrix.gpu }} ${{ matrix.suffix }} - name: Login to DockerHub if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') @@ -418,66 +68,4 @@ jobs: - name: Push Docker images if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') run: | - ./docker/build_all.sh ${GITHUB_REF##*/v} 1 ${{ matrix.gpu }} - - - build-and-deploy-docs: - runs-on: ubuntu-latest - needs: [check-python-style, build-python-wheels, build-python-wheels-rocm] - - permissions: - contents: write - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.10.11 - uses: actions/setup-python@v5 - with: - python-version: "3.10.11" - - name: Download CTranslate2 wheels - uses: actions/download-artifact@v4 - with: - pattern: python-wheels-${{ runner.os }}-* - merge-multiple: true - path: . - - name: Install CTranslate2 wheel - run: | - pip install *cp310*manylinux*x86_64.whl - - name: Install dependencies to build docs - working-directory: docs - run: | - python -m pip install -r requirements.txt - - name: Build docs - working-directory: docs - run: | - python generate.py python - sphinx-build . build - - name: Deploy docs - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - uses: JamesIves/github-pages-deploy-action@v4 - with: - folder: docs/build - clean: true - - name: Download ROCm wheels and zip for release - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - mkdir -p release-zips - - gh run download ${{ github.run_id }} \ - --name rocm-python-wheels-Linux \ - --dir temp-linux - zip -r release-zips/rocm-python-wheels-Linux.zip temp-linux/* - rm -rf temp-linux - - gh run download ${{ github.run_id }} \ - --name rocm-python-wheels-Windows \ - --dir temp-windows - zip -r release-zips/rocm-python-wheels-Windows.zip temp-windows/* - rm -rf temp-windows - - - name: Create GitHub Release - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - uses: softprops/action-gh-release@v2 - with: - files: release-zips/*.zip + ./docker/build_all.sh ${GITHUB_REF##*/v} 1 ${{ matrix.gpu }} ${{ matrix.suffix }} diff --git a/docker/Dockerfile_cuda_arm b/docker/Dockerfile_cuda_arm new file mode 100644 index 000000000..9b242c20d --- /dev/null +++ b/docker/Dockerfile_cuda_arm @@ -0,0 +1,92 @@ +FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 AS builder + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3-dev \ + python3-pip \ + wget \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /root + +RUN python3 -m pip --no-cache-dir install cmake==3.22.* + +ENV ONEDNN_VERSION=3.1.1 +RUN wget -q https://github.com/uxlfoundation/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \ + tar xf *.tar.gz && \ + rm *.tar.gz && \ + cd oneDNN-* && \ + cmake -DCMAKE_BUILD_TYPE=Release -DONEDNN_LIBRARY_TYPE=STATIC -DONEDNN_BUILD_EXAMPLES=OFF -DONEDNN_BUILD_TESTS=OFF -DONEDNN_ENABLE_WORKLOAD=INFERENCE -DONEDNN_ENABLE_PRIMITIVE="CONVOLUTION;REORDER" -DONEDNN_BUILD_GRAPH=OFF . && \ + make -j$(nproc) install && \ + cd .. && \ + rm -r oneDNN-* + +ENV OPENMPI_VERSION=4.1.6 +RUN wget -q https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.bz2 && \ + tar xf *.tar.bz2 && \ + rm *.tar.bz2 && \ + cd openmpi-* && \ + ./configure && \ + make -j$(nproc) install && \ + cd .. && \ + rm -r openmpi-* + +COPY third_party third_party +COPY cli cli +COPY include include +COPY src src +COPY cmake cmake +COPY python python +COPY CMakeLists.txt . + +ARG CXX_FLAGS +ARG CUDA_NVCC_FLAGS +ENV CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS:-"-Xfatbin=-compress-all"} +ARG CUDA_ARCH_LIST +ENV CUDA_ARCH_LIST=${CUDA_ARCH_LIST:-"Common"} +ENV CTRANSLATE2_ROOT=/opt/ctranslate2 +ENV LD_LIBRARY_PATH=/usr/local/lib/:${LD_LIBRARY_PATH} + +RUN mkdir build_tmp && \ + cd build_tmp && \ + cmake -DCMAKE_INSTALL_PREFIX=${CTRANSLATE2_ROOT} \ + -DWITH_CUDA=ON -DWITH_CUDNN=ON -DWITH_MKL=OFF -DWITH_DNNL=ON -DOPENMP_RUNTIME=COMP \ + -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS}" -DCUDA_ARCH_LIST="${CUDA_ARCH_LIST}" -DWITH_TENSOR_PARALLEL=ON .. && \ + VERBOSE=1 make -j$(nproc) install + +ENV LANG=en_US.UTF-8 +COPY README.md . + +RUN cd python && \ + python3 -m pip --no-cache-dir install -r install_requirements.txt && \ + python3 setup.py bdist_wheel --dist-dir $CTRANSLATE2_ROOT + +FROM nvidia/cuda:12.8.1-base-ubuntu22.04 + +# We remove the cuda-compat package because it conflicts with the CUDA Enhanced Compatibility. +# See e.g. https://github.com/NVIDIA/nvidia-docker/issues/1515 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libcublas-12-8 \ + libcudnn9-cuda-12 \ + libnccl2 \ + libopenmpi3 \ + openmpi-bin \ + libgomp1 \ + python3-pip \ + && \ + apt-get purge -y cuda-compat-12-8 || true && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CTRANSLATE2_ROOT=/opt/ctranslate2 +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CTRANSLATE2_ROOT/lib + +COPY --from=builder $CTRANSLATE2_ROOT $CTRANSLATE2_ROOT +RUN python3 -m pip --no-cache-dir install $CTRANSLATE2_ROOT/*.whl && \ + rm $CTRANSLATE2_ROOT/*.whl + +ENTRYPOINT ["/opt/ctranslate2/bin/ct2-translator"] diff --git a/docker/build_all.sh b/docker/build_all.sh index f05ea99e8..03cf31e67 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -21,7 +21,8 @@ cd $ROOT_DIR VERSION=${1:-latest} PUSH=${2:-0} GPU=${3:-cuda} -IMAGE=ghcr.io/opennmt/ctranslate2 +SUFFIX=${4} +IMAGE=ghcr.io/nicolasbfr/ctranslate2 build() { @@ -29,8 +30,8 @@ build() IMAGE_NAME=$2 BUILD_ARGS=${3:-} - LATEST=$IMAGE:latest-$IMAGE_NAME - TAGGED=$IMAGE:$VERSION-$IMAGE_NAME + LATEST=$IMAGE:latest-$IMAGE_NAME$SUFFIX + TAGGED=$IMAGE:$VERSION-$IMAGE_NAME$SUFFIX docker build --pull $BUILD_ARGS -t $LATEST -f docker/$DOCKERFILE . if [ $PUSH -eq 1 ]; then docker push $LATEST @@ -45,6 +46,8 @@ build() if [ "$GPU" == "rocm" ]; then build Dockerfile_rocm ubuntu22.04-rocm7.2 +elif [ "$GPU" == "cudaarm" ]; then + build Dockerfile_cuda_arm ubuntu22.04-cuda12.8 else build Dockerfile ubuntu22.04-cuda12.8 fi