diff --git a/.azure-pipelines/codecov.yml b/.azure-pipelines/codecov.yml new file mode 100644 index 000000000..c4abeaa78 --- /dev/null +++ b/.azure-pipelines/codecov.yml @@ -0,0 +1,93 @@ +trigger: + branches: + include: + - main + - release/* + paths: + exclude: + - .devcontainer/** + - .github/** + - apps/** + - docker/** + - docs/** + - '**/*.md' + +pr: + branches: + include: + - main + - release/* + drafts: false + paths: + exclude: + - .devcontainer/** + - .github/** + - apps/** + - docker/** + - docs/** + - '**/*.md' + +jobs: +- job: CodeCoverageA100 + timeoutInMinutes: 40 + pool: + name: msccl-ci + variables: + - group: mscclpp + strategy: + matrix: + cuda12: + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 + + container: + image: $(containerImage) + + steps: + - template: templates/codecov.yml + parameters: + subscription: mscclpp-ci + vmssName: mscclpp-ci + gpuArch: '80' + +- job: CodeCoverageH100 + timeoutInMinutes: 40 + pool: + name: msccl-ci-h100 + variables: + - group: mscclpp + strategy: + matrix: + cuda12: + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 + + container: + image: $(containerImage) + + steps: + - template: templates/codecov.yml + parameters: + subscription: mscclpp-ci-h100 + vmssName: mscclpp-h100-ci + gpuArch: '90' + +- job: CodeCoverageMI300X + timeoutInMinutes: 40 + pool: + name: msccl-ci-mi300x + variables: + - group: mscclpp + strategy: + matrix: + rocm6_2: + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2 + + container: + image: $(containerImage) + + steps: + - template: templates/codecov.yml + parameters: + subscription: mscclpp-ci-mi300x + vmssName: mscclpp-mi300x-ci + platform: rocm + gpuArch: gfx942 diff --git a/.azure-pipelines/integration-test.yml b/.azure-pipelines/integration-test.yml index f6fe3a47f..d5d5f9bde 100644 --- a/.azure-pipelines/integration-test.yml +++ b/.azure-pipelines/integration-test.yml @@ -41,11 +41,10 @@ jobs: image: $(containerImage) steps: - - template: templates/integration-test.yaml + - template: templates/integration-test.yml parameters: subscription: mscclpp-ci vmssName: mscclpp-ci - sshKeySecureFile: mscclpp.pem gpuArch: '80' - job: IntegrationTestH100 @@ -61,10 +60,9 @@ jobs: image: $(containerImage) steps: - - template: templates/integration-test.yaml + - template: templates/integration-test.yml parameters: subscription: mscclpp-ci-h100 vmssName: mscclpp-h100-ci - sshKeySecureFile: mscclpp.pem perfBaselineFile: test/deploy/perf_ndmv5.jsonl gpuArch: '90' diff --git a/.azure-pipelines/multi-nodes-test.yml b/.azure-pipelines/multi-nodes-test.yml index 914c2317c..d49248791 100644 --- a/.azure-pipelines/multi-nodes-test.yml +++ b/.azure-pipelines/multi-nodes-test.yml @@ -37,33 +37,6 @@ jobs: image: $[ variables['containerImage'] ] steps: - - task: Bash@3 - name: Build - displayName: Build - inputs: - targetType: 'inline' - script: | - mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON .. - make -j - workingDirectory: '$(System.DefaultWorkingDirectory)' - - - task: DownloadSecureFile@1 - name: SshKeyFile - displayName: Download key file - inputs: - secureFile: mscclpp-ssh.key - - - task: Bash@3 - name: InstallPackages - displayName: Install Packages - inputs: - targetType: 'inline' - script: | - sudo apt-get update -y - sudo apt-get install pssh -y - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - - task: Bash@3 displayName: Add HostEntry inputs: @@ -77,107 +50,46 @@ jobs: echo "Entry already exists, nothing to do." fi - - task: AzureCLI@2 - name: StartVMSS - displayName: Start VMSS - inputs: - azureSubscription: msccl-it - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss start --name mscclit-vmss --resource-group msccl-IT + - template: templates/deploy.yml + parameters: + subscription: msccl-it + vmssName: mscclit-vmss + resourceGroup: msccl-IT - - task: Bash@3 - name: DeployTestEnv - displayName: Deploy Test Env - inputs: - targetType: filePath - filePath: test/deploy/deploy.sh - workingDirectory: '$(System.DefaultWorkingDirectory)' + - template: templates/run-remote-task.yml + parameters: + name: RunMscclppTest + displayName: Run multi-nodes mscclpp-test + runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' + remoteScript: | + bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test - - task: Bash@3 - name: RunMscclppTest - displayName: Run multi-nodes mscclpp-test - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - rm -rf output/* - mkdir -p output - touch output/mscclit-000000 - tail -f output/mscclit-000000 & - CHILD_PID=$! - parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh mscclpp-test' - kill $CHILD_PID - - - task: Bash@3 - name: RunMultiNodeUnitTest - displayName: Run multi-nodes unit tests - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - rm -rf output/* - mkdir -p output - touch output/mscclit-000000 - tail -f output/mscclit-000000 & - CHILD_PID=$! - parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh mp-ut' - kill $CHILD_PID + - template: templates/run-remote-task.yml + parameters: + name: RunMultiNodeUnitTest + displayName: Run multi-nodes unit tests + runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' + remoteScript: | + bash /root/mscclpp/test/deploy/run_tests.sh mp-ut - - task: Bash@3 - name: RunMultiNodePythonTests - displayName: Run multi-nodes python tests - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - rm -rf output/* - mkdir -p output - touch output/mscclit-000000 - tail -f output/mscclit-000000 & - CHILD_PID=$! - parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh pytests' - kill $CHILD_PID + - template: templates/run-remote-task.yml + parameters: + name: RunMultiNodePythonTests + displayName: Run multi-nodes python tests + runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' + remoteScript: | + bash /root/mscclpp/test/deploy/run_tests.sh pytests - - task: Bash@3 - name: RunMultiNodePythonBenchmark - displayName: Run multi-nodes python benchmark - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/mscclpp-test/deploy/hostfile - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - rm -rf output/* - mkdir -p output - touch output/mscclit-000000 - tail -f output/mscclit-000000 & - CHILD_PID=$! - parallel-ssh -t 0 -H mscclit-000000 -l azureuser -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION -o output 'sudo docker exec -t mscclpp-test bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark' - kill $CHILD_PID + - template: templates/run-remote-task.yml + parameters: + name: RunMultiNodePythonBenchmark + displayName: Run multi-nodes python benchmark + runRemoteArgs: '--hostfile $(System.DefaultWorkingDirectory)/test/deploy/hostfile --host mscclit-000000 --user azureuser' + remoteScript: | + bash /root/mscclpp/test/deploy/run_tests.sh py-benchmark - - task: AzureCLI@2 - name: StopVMSS - displayName: Deallocate VMSS - condition: always() - inputs: - azureSubscription: msccl-it - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss deallocate --name mscclit-vmss --resource-group msccl-IT + - template: templates/stop.yml + parameters: + subscription: msccl-it + vmssName: mscclit-vmss + resourceGroup: msccl-IT diff --git a/.azure-pipelines/nccl-api-test.yaml b/.azure-pipelines/nccl-api-test.yml similarity index 88% rename from .azure-pipelines/nccl-api-test.yaml rename to .azure-pipelines/nccl-api-test.yml index 4951c5bdd..cc0174120 100644 --- a/.azure-pipelines/nccl-api-test.yaml +++ b/.azure-pipelines/nccl-api-test.yml @@ -40,11 +40,10 @@ jobs: image: $(containerImage) steps: - - template: templates/nccl-test.yaml + - template: templates/nccl-test.yml parameters: subscription: mscclpp-ci vmssName: mscclpp-ci - sshKeySecureFile: mscclpp.pem nvccGencode: "-gencode=arch=compute_80,code=sm_80" - job: NcclTestH100 @@ -61,9 +60,8 @@ jobs: image: $(containerImage) steps: - - template: templates/nccl-test.yaml + - template: templates/nccl-test.yml parameters: subscription: mscclpp-ci-h100 vmssName: mscclpp-h100-ci - sshKeySecureFile: mscclpp.pem nvccGencode: "-gencode=arch=compute_90,code=sm_90" \ No newline at end of file diff --git a/.azure-pipelines/rccl-api-test.yml b/.azure-pipelines/rccl-api-test.yml index 92c5874f6..43841079e 100644 --- a/.azure-pipelines/rccl-api-test.yml +++ b/.azure-pipelines/rccl-api-test.yml @@ -40,9 +40,8 @@ jobs: image: $(containerImage) steps: - - template: templates/rccl-test.yaml + - template: templates/rccl-test.yml parameters: subscription: mscclpp-ci-mi300x vmssName: mscclpp-mi300x-ci - sshKeySecureFile: mscclpp.pem gpuArch: gfx942 diff --git a/.azure-pipelines/templates/codecov.yml b/.azure-pipelines/templates/codecov.yml new file mode 100644 index 000000000..08797351a --- /dev/null +++ b/.azure-pipelines/templates/codecov.yml @@ -0,0 +1,110 @@ +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: platform + type: string + default: 'cuda' +- name: gpuArch + type: string + +steps: +- template: deploy.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} + platform: ${{ parameters.platform }} + gpuArch: ${{ parameters.gpuArch }} + buildType: Debug + cmakeArgs: '-DMSCCLPP_ENABLE_COVERAGE=ON' + buildDisplayName: 'Build with coverage' + buildName: BuildCoverage + deployArgs: 'single-node-test true ${{ parameters.platform }}' + +- template: run-remote-task.yml + parameters: + name: TestsCoverageNonPerf + displayName: Run unit_tests + mp_unit_tests (non-perf) with coverage + remoteScript: | + BUILD_PREFIX=$(cat build/BUILD_PREFIX) + STRIP_COUNT=$(echo $BUILD_PREFIX | tr -cd / | wc -c) + export GCOV_PREFIX=/root/mscclpp + export GCOV_PREFIX_STRIP=$STRIP_COUNT + + echo "Running unit_tests..." + ./build/bin/unit_tests + echo "unit_tests: PASSED" + + echo "Running mp_unit_tests -np 2..." + mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests --exclude-perf-tests + echo "mp_unit_tests -np 2: PASSED" + + echo "Running mp_unit_tests -np 4..." + mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests --exclude-perf-tests + echo "mp_unit_tests -np 4: PASSED" + +- template: run-remote-task.yml + parameters: + name: CaptureCoverage + displayName: Capture coverage data with lcov + remoteScript: | + BUILD_PREFIX=$(cat build/BUILD_PREFIX) + + GCOV_TOOL_ARG="" + if [ "${{ parameters.platform }}" = "rocm" ]; then + apt-get update -qq && apt-get install -y -qq llvm 2>/dev/null | tail -1 + GCOV_WRAPPER=$(mktemp) + printf '#!/bin/sh\nexec llvm-cov gcov "$@"\n' > "$GCOV_WRAPPER" + chmod +x "$GCOV_WRAPPER" + GCOV_TOOL_ARG="--gcov-tool ${GCOV_WRAPPER}" + fi + + lcov --version + LCOV_CAPTURE_ARGS="" + if lcov --help 2>&1 | grep -q "inconsistent"; then + LCOV_CAPTURE_ARGS="--ignore-errors inconsistent" + fi + + lcov ${GCOV_TOOL_ARG} --directory . --capture --output-file coverage.info ${LCOV_CAPTURE_ARGS} + if [ ! -s coverage.info ]; then + echo "ERROR: coverage.info was not generated." + exit 1 + fi + + lcov ${GCOV_TOOL_ARG} --extract coverage.info "${BUILD_PREFIX}/src/*" "${BUILD_PREFIX}/include/mscclpp/*" --output-file coverage.info + lcov --list coverage.info + ls -la coverage.info + +- task: Bash@3 + name: FetchCoverage + displayName: Fetch coverage data from remote VM + inputs: + targetType: 'inline' + script: | + set -e + HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci + SSH_OPTION="StrictHostKeyChecking=no" + KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} + HOST=$(head -1 ${HOSTFILE}) + ssh -i ${KeyFilePath} -o ${SSH_OPTION} ${HOST} \ + 'sudo docker cp mscclpp-test:/root/mscclpp/coverage.info /tmp/coverage.info' + scp -i ${KeyFilePath} -o ${SSH_OPTION} ${HOST}:/tmp/coverage.info $(System.DefaultWorkingDirectory)/coverage.info + workingDirectory: '$(System.DefaultWorkingDirectory)' + +- task: Bash@3 + name: UploadCodecov + displayName: Upload coverage to Codecov + inputs: + targetType: 'inline' + script: | + set -e + curl -Os https://cli.codecov.io/latest/linux/codecov + chmod +x codecov + ./codecov upload-process --disable-search -t $(CODECOV_TOKEN) -f coverage.info --flag ${{ parameters.platform }}-${{ parameters.gpuArch }} + workingDirectory: '$(System.DefaultWorkingDirectory)' + +- template: stop.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} diff --git a/.azure-pipelines/templates/deploy.yml b/.azure-pipelines/templates/deploy.yml new file mode 100644 index 000000000..fc116acfb --- /dev/null +++ b/.azure-pipelines/templates/deploy.yml @@ -0,0 +1,131 @@ +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: resourceGroup + type: string + default: mscclpp +# Build parameters +- name: platform + type: string + default: 'cuda' +- name: gpuArch + type: string + default: '' +- name: buildType + type: string + default: 'Release' +- name: buildTests + type: string + default: 'true' +- name: cmakeArgs + type: string + default: '' +- name: buildName + type: string + default: 'Build' +- name: buildDisplayName + type: string + default: 'Build' +# Deploy parameters +- name: deployArgs + type: string + default: '' + +steps: +# 0. Ensure Azure CLI exists before running AzureCLI@2 tasks. +- task: Bash@3 + name: EnsureAzureCLI + displayName: Ensure Azure CLI Installed + inputs: + targetType: inline + script: | + set -e + if command -v az >/dev/null 2>&1; then + az version >/dev/null + exit 0 + fi + curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + +# 1. Build +- task: Bash@3 + name: ${{ parameters.buildName }} + displayName: ${{ parameters.buildDisplayName }} + inputs: + targetType: 'inline' + script: | + set -e + rm -rf build + mkdir -p build && cd build + BUILD_TESTS_ARG="" + if [ "${{ parameters.buildTests }}" = "true" ]; then + BUILD_TESTS_ARG="-DMSCCLPP_BUILD_TESTS=ON" + fi + + GPU_ARCH_ARG="" + if [ -n "${{ parameters.gpuArch }}" ]; then + GPU_ARCH_ARG="-DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }}" + fi + + CMAKE_EXTRA_ARGS='${{ parameters.cmakeArgs }}' + if [ "${{ parameters.platform }}" = "rocm" ]; then + eval CXX=/opt/rocm/bin/hipcc cmake \ + -DCMAKE_BUILD_TYPE=${{ parameters.buildType }} \ + -DMSCCLPP_BYPASS_GPU_CHECK=ON \ + -DMSCCLPP_USE_ROCM=ON \ + ${BUILD_TESTS_ARG} \ + ${GPU_ARCH_ARG} \ + ${CMAKE_EXTRA_ARGS} .. + else + eval cmake \ + -DCMAKE_BUILD_TYPE=${{ parameters.buildType }} \ + -DMSCCLPP_BYPASS_GPU_CHECK=ON \ + -DMSCCLPP_USE_CUDA=ON \ + ${BUILD_TESTS_ARG} \ + ${GPU_ARCH_ARG} \ + ${CMAKE_EXTRA_ARGS} .. + fi + make -j + cd .. + pwd > build/BUILD_PREFIX + echo "=== Build artifacts ===" + ls -la build/bin/ || echo "ERROR: build/bin/ missing after build" + du -sh build/bin/* 2>/dev/null || true + workingDirectory: '$(System.DefaultWorkingDirectory)' + +# 2. Download SSH key + install packages + start VMSS +- task: DownloadSecureFile@1 + name: SshKeyFile + displayName: Download key file + inputs: + secureFile: mscclpp.pem + +- task: Bash@3 + name: InstallPackages + displayName: Install Packages + inputs: + targetType: 'inline' + script: | + sudo apt-get update -y + sudo apt-get install pssh -y + +- task: AzureCLI@2 + name: StartVMSS + displayName: Start VMSS + inputs: + azureSubscription: ${{ parameters.subscription }} + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + az vmss start --name ${{ parameters.vmssName }} --resource-group ${{ parameters.resourceGroup }} + +# 3. Deploy test environment +- task: Bash@3 + name: DeployTestEnv + displayName: Deploy Test Env + inputs: + targetType: filePath + filePath: test/deploy/deploy.sh + arguments: ${{ parameters.deployArgs }} + workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.azure-pipelines/templates/integration-test.yaml b/.azure-pipelines/templates/integration-test.yaml deleted file mode 100644 index 99ed6d04c..000000000 --- a/.azure-pipelines/templates/integration-test.yaml +++ /dev/null @@ -1,242 +0,0 @@ -parameters: -- name: subscription - type: string -- name: vmssName - type: string -- name: sshKeySecureFile - type: string -- name: perfBaselineFile - type: string - default: 'test/deploy/perf_ndmv4.jsonl' -- name: gpuArch - type: string - -steps: -- task: Bash@3 - name: Build - displayName: Build - inputs: - targetType: inline - script: | - mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. - make -j - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: InstallPackages - displayName: Install Packages - inputs: - targetType: inline - script: | - sudo apt-get update -y - sudo apt-get install pssh -y - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - -- task: DownloadSecureFile@1 - name: SshKeyFile - displayName: Download key file - inputs: - secureFile: ${{ parameters.sshKeySecureFile }} - -- task: AzureCLI@2 - name: StartVMSS - displayName: Start VMSS - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp - -- task: Bash@3 - name: DeployTestEnv - displayName: Deploy Test Env - inputs: - targetType: filePath - filePath: test/deploy/deploy.sh - arguments: "single-node-test" - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: AllGatherTest - displayName: Run mscclpp AllGather test - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - set -e; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: SendRecvTest - displayName: Run mscclpp SendRecv test - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - set -e; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/sendrecv_test_perf -b 1K -e 1G -f 2 -o output.jsonl"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: AllReduceTest - displayName: Run mscclpp AllReduce test - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - set -e; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 4 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 12M -e 48M -i 3145728 2 -k 5 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 24K -e 768K -i 24576 -k 6 -w 100 -n 100 -o output.jsonl"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: AllToAll - displayName: Run mscclpp AllToAll test - inputs: - targetType: 'inline' - script: | - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - set -e; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -o output.jsonl; \ - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: CheckPerfNumber - displayName: Check collective primitives performance - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - set -e; \ - cd /root/mscclpp; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file ${{ parameters.perfBaselineFile }}"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: PythonAllReduceBenchmark - displayName: Python Allreduce Benchmark - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - set -e; \ - cd /root/mscclpp; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - python3 -m pip install .; \ - mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 ./python/mscclpp_benchmark/allreduce_bench.py"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: FifoPerfBenchmark - displayName: FIFO Performance Benchmark - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -o . -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}"\ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - set -e; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - ./build/bin/perf/fifo_test"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - - -- task: AzureCLI@2 - name: StopVMSS - displayName: Deallocate VMSS - condition: always() - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp \ No newline at end of file diff --git a/.azure-pipelines/templates/integration-test.yml b/.azure-pipelines/templates/integration-test.yml new file mode 100644 index 000000000..b686e4f21 --- /dev/null +++ b/.azure-pipelines/templates/integration-test.yml @@ -0,0 +1,76 @@ +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: perfBaselineFile + type: string + default: 'test/deploy/perf_ndmv4.jsonl' +- name: gpuArch + type: string + +steps: +- template: deploy.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} + gpuArch: ${{ parameters.gpuArch }} + deployArgs: 'single-node-test' + +- template: run-remote-task.yml + parameters: + name: AllGatherTest + displayName: Run mscclpp AllGather test + remoteScript: | + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl + +- template: run-remote-task.yml + parameters: + name: SendRecvTest + displayName: Run mscclpp SendRecv test + remoteScript: | + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/sendrecv_test_perf -b 1K -e 1G -f 2 -o output.jsonl + +- template: run-remote-task.yml + parameters: + name: AllReduceTest + displayName: Run mscclpp AllReduce test + remoteScript: | + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 4 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 12M -e 48M -i 3145728 2 -k 5 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 24K -e 768K -i 24576 -k 6 -w 100 -n 100 -o output.jsonl + +- template: run-remote-task.yml + parameters: + name: AllToAll + displayName: Run mscclpp AllToAll test + remoteScript: | + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -o output.jsonl + mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl + +- template: run-remote-task.yml + parameters: + name: CheckPerfNumber + displayName: Check collective primitives performance + remoteScript: | + python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file ${{ parameters.perfBaselineFile }} + +- template: run-remote-task.yml + parameters: + name: PythonAllReduceBenchmark + displayName: Python Allreduce Benchmark + remoteScript: | + python3 -m pip install . + mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 ./python/mscclpp_benchmark/allreduce_bench.py + +- template: stop.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} \ No newline at end of file diff --git a/.azure-pipelines/templates/nccl-test.yaml b/.azure-pipelines/templates/nccl-test.yaml deleted file mode 100644 index ef4a9fa8f..000000000 --- a/.azure-pipelines/templates/nccl-test.yaml +++ /dev/null @@ -1,282 +0,0 @@ -# .azure-pipelines/templates/nccl-test.yaml -# ---------------------------------------- -# A step‐template that runs the entire MSCCLPP→NCCL test suite on one pool/container. -# -# Parameters: -# subscription – Azure subscription to use for VMSS start/stop -# sshKeySecureFile – the secureFile name for your SSH key - -parameters: -- name: subscription - type: string -- name: vmssName - type: string -- name: sshKeySecureFile - type: string -- name: nvccGencode - type: string - default: "-gencode=arch=compute_80,code=sm_80" - -steps: -- checkout: self -- checkout: git://One/msccl-users -- task: Bash@3 - name: Build - displayName: Build - inputs: - targetType: 'inline' - script: | - mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON .. - make -j - workingDirectory: '$(System.DefaultWorkingDirectory)/mscclpp' - -- task: DownloadSecureFile@1 - name: SshKeyFile - displayName: Download key file - inputs: - secureFile: ${{ parameters.sshKeySecureFile }} - -- task: Bash@3 - name: InstallPackages - displayName: Install Packages - inputs: - targetType: 'inline' - script: | - sudo apt-get update -y - sudo apt-get install pssh -y - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash -- task: AzureCLI@2 - name: StartVMSS - displayName: Start VMSS - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp -- task: Bash@3 - name: DeployTestEnv - displayName: Deploy Test Env - inputs: - targetType: filePath - filePath: mscclpp/test/deploy/deploy.sh - arguments: nccltest-single-node - workingDirectory: $(System.DefaultWorkingDirectory)/mscclpp - -- task: Bash@3 - name: CopyMscclUsers - displayName: Copy msccl-users - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory)/msccl-users - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - DST_DIR="/tmp/mscclpp/msccl-users" - parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR} - workingDirectory: '$(System.DefaultWorkingDirectory)' - -# - task: Bash@3 -# name: GenerateExecutionFile -# displayName: Generate execution file -# inputs: -# targetType: 'inline' -# script: | -# set -e -# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci -# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp -# SSH_OPTION="StrictHostKeyChecking=no" -# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} -# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ -# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ -# cd /root/mscclpp/msccl-users; \ -# mkdir -p execution-files; \ -# cd /root/mscclpp/msccl-users; \ -# bash algos/mscclpp_a100/generate_execution_plan.sh"' -# workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: InstallNcclTests - displayName: Install NCCL Tests - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - cd; git clone https://github.com/NVIDIA/nccl-tests.git; \ - cd nccl-tests; \ - MPI=1 MPI_HOME=/usr/local/mpi make -j"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -# - task: Bash@3 -# name: RunNcclAllReduceTest -# displayName: Run NCCL AllReduce Test -# inputs: -# targetType: inline -# script: | -# set -e -# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci -# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp -# SSH_OPTION="StrictHostKeyChecking=no" -# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} -# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ -# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ -# cd /root/mscclpp; \ -# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' -# workingDirectory: '$(System.DefaultWorkingDirectory)' - -# - task: Bash@3 -# name: RunNcclAllGatherTest -# displayName: Run NCCL AllGather Test -# inputs: -# targetType: inline -# script: | -# set -e -# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci -# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp -# SSH_OPTION="StrictHostKeyChecking=no" -# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} -# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ -# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ -# cd /root/mscclpp; \ -# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' -# workingDirectory: '$(System.DefaultWorkingDirectory)' - -# - task: Bash@3 -# name: RunNcclReduceScatterTest -# displayName: Run NCCL Reduce Scatter Test -# inputs: -# targetType: inline -# script: | -# set -e -# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci -# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp -# SSH_OPTION="StrictHostKeyChecking=no" -# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} -# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ -# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ -# cd /root/mscclpp; \ -# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' -# workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: InstallNccl - displayName: Install NCCL - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - LATEST_TAG=\$(curl -fsSL https://api.github.com/repos/NVIDIA/nccl/releases/latest | grep tag_name | cut -d\\\" -f4); \ - if [ -z \"\$LATEST_TAG\" ]; then echo \"Failed to fetch latest NCCL tag\"; exit 1; fi; \ - cd; git clone --branch \$LATEST_TAG --depth 1 https://github.com/NVIDIA/nccl.git; \ - cd nccl; \ - make -j src.build NVCC_GENCODE=${{ parameters.nvccGencode }}"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: RunNcclAllGatherFallbaclkToNcclTest - displayName: Run NCCL AllGather Test with or without Fallback to NCCL operation - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allgather\" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allreduce\" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: RunNcclAllReduceFallbaclkToNcclTest - displayName: Run NCCL AllReduce Test with or without Fallback to NCCL operation - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allreduce\" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allgather\" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: RunNcclBroadcastFallbaclkToNcclTest - displayName: Run NCCL Broadcast Test with or without Fallback to NCCL operation - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"allreduce\" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -# - task: Bash@3 -# name: RunNcclReduceScatterFallbaclkToNcclTest -# displayName: Run NCCL ReduceScatter Test with or without Fallback to NCCL operation -# inputs: -# targetType: 'inline' -# script: | -# set -e -# HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci -# ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp -# SSH_OPTION="StrictHostKeyChecking=no" -# KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} -# parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ -# -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ -# cd /root/mscclpp; \ -# echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \ -# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \ -# echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \ -# mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' -# workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: AzureCLI@2 - name: StopVMSS - displayName: Deallocate VMSS - condition: always() - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp diff --git a/.azure-pipelines/templates/nccl-test.yml b/.azure-pipelines/templates/nccl-test.yml new file mode 100644 index 000000000..211e2393a --- /dev/null +++ b/.azure-pipelines/templates/nccl-test.yml @@ -0,0 +1,76 @@ +# .azure-pipelines/templates/nccl-test.yml +# ---------------------------------------- +# A step‐template that runs the entire MSCCLPP→NCCL test suite on one pool/container. +# +# Parameters: +# subscription – Azure subscription to use for VMSS start/stop + +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: nvccGencode + type: string + default: "-gencode=arch=compute_80,code=sm_80" + +steps: +- template: deploy.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} + deployArgs: 'nccltest-single-node' + +- template: run-remote-task.yml + parameters: + name: InstallNcclTests + displayName: Install NCCL Tests + remoteScript: | + cd + git clone https://github.com/NVIDIA/nccl-tests.git + cd nccl-tests + MPI=1 MPI_HOME=/usr/local/mpi make -j + +- template: run-remote-task.yml + parameters: + name: InstallNccl + displayName: Install NCCL + remoteScript: | + LATEST_TAG=$(curl -fsSL https://api.github.com/repos/NVIDIA/nccl/releases/latest | grep tag_name | cut -d\" -f4) + if [ -z "$LATEST_TAG" ]; then + echo "Failed to fetch latest NCCL tag" + exit 1 + fi + cd + git clone --branch $LATEST_TAG --depth 1 https://github.com/NVIDIA/nccl.git + cd nccl + make -j src.build NVCC_GENCODE=${{ parameters.nvccGencode }} + +- template: run-remote-task.yml + parameters: + name: RunNcclAllGatherFallbaclkToNcclTest + displayName: Run NCCL AllGather Test with or without Fallback to NCCL operation + remoteScript: | + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + +- template: run-remote-task.yml + parameters: + name: RunNcclAllReduceFallbaclkToNcclTest + displayName: Run NCCL AllReduce Test with or without Fallback to NCCL operation + remoteScript: | + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + +- template: run-remote-task.yml + parameters: + name: RunNcclBroadcastFallbaclkToNcclTest + displayName: Run NCCL Broadcast Test with or without Fallback to NCCL operation + remoteScript: | + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + +- template: stop.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} diff --git a/.azure-pipelines/templates/rccl-test.yaml b/.azure-pipelines/templates/rccl-test.yaml deleted file mode 100644 index 040605dfd..000000000 --- a/.azure-pipelines/templates/rccl-test.yaml +++ /dev/null @@ -1,142 +0,0 @@ -# .azure-pipelines/templates/rccl-test.yaml -# ------------------------------------------------ -# A step-template that runs the entire MSCCLPP→RCCL test suite on one pool/container. -# -# Parameters: -# subscription – Azure subscription to use for VMSS start/stop -# vmssName – VMSS name to start/stop -# sshKeySecureFile – the secureFile name for your SSH key -# gpuArch – GPU architecture (e.g. gfx942) - -parameters: -- name: subscription - type: string -- name: vmssName - type: string -- name: sshKeySecureFile - type: string -- name: gpuArch - type: string - default: "gfx942" - -steps: -- task: Bash@3 - name: Build - displayName: Build - inputs: - targetType: 'inline' - script: | - mkdir build && cd build - CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. - make -j - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: DownloadSecureFile@1 - name: SshKeyFile - displayName: Download key file - inputs: - secureFile: ${{ parameters.sshKeySecureFile }} - -- task: Bash@3 - name: InstallPackages - displayName: Install Packages - inputs: - targetType: 'inline' - script: | - sudo apt-get update -y - sudo apt-get install pssh -y - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash -- task: AzureCLI@2 - name: StartVMSS - displayName: Start VMSS - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp -- task: Bash@3 - name: DeployTestEnv - displayName: Deploy Test Env - inputs: - targetType: filePath - filePath: test/deploy/deploy.sh - arguments: "single-node-test true rocm" - workingDirectory: $(System.DefaultWorkingDirectory) - - -- task: Bash@3 - name: InstallRcclTests - displayName: Install RCCL Tests - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory) - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - cd; \ - git clone --filter=blob:none --no-checkout https://github.com/ROCm/rocm-systems.git; \ - cd rocm-systems; \ - git sparse-checkout init --cone; \ - git sparse-checkout set projects/rccl-tests; \ - git checkout; \ - cd projects/rccl-tests; \ - MPI=1 MPI_HOME=/usr/local/mpi make -j"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: RunRcclAllGatherTest - displayName: Run RCCL AllGather Test with or without MSCCLPP Lib - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory) - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: RunRcclAllReduceTest - displayName: Run RCCL AllReduce Test with or without MSCCLPP Lib - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - ROOT_DIR=$(System.DefaultWorkingDirectory) - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \ - echo \"mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\";\ - mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"' - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: AzureCLI@2 - name: StopVMSS - displayName: Deallocate VMSS - condition: always() - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp diff --git a/.azure-pipelines/templates/rccl-test.yml b/.azure-pipelines/templates/rccl-test.yml new file mode 100644 index 000000000..8e2471614 --- /dev/null +++ b/.azure-pipelines/templates/rccl-test.yml @@ -0,0 +1,63 @@ +# .azure-pipelines/templates/rccl-test.yml +# ------------------------------------------------ +# A step-template that runs the entire MSCCLPP→RCCL test suite on one pool/container. +# +# Parameters: +# subscription – Azure subscription to use for VMSS start/stop +# vmssName – VMSS name to start/stop +# gpuArch – GPU architecture (e.g. gfx942) + +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: gpuArch + type: string + default: "gfx942" + +steps: +- template: deploy.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} + platform: rocm + gpuArch: ${{ parameters.gpuArch }} + buildTests: false + deployArgs: 'single-node-test true rocm' + + +- template: run-remote-task.yml + parameters: + name: InstallRcclTests + displayName: Install RCCL Tests + remoteScript: | + cd + git clone --filter=blob:none --no-checkout https://github.com/ROCm/rocm-systems.git + cd rocm-systems + git sparse-checkout init --cone + git sparse-checkout set projects/rccl-tests + git checkout + cd projects/rccl-tests + MPI=1 MPI_HOME=/usr/local/mpi make -j + +- template: run-remote-task.yml + parameters: + name: RunRcclAllGatherTest + displayName: Run RCCL AllGather Test with or without MSCCLPP Lib + remoteScript: | + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + +- template: run-remote-task.yml + parameters: + name: RunRcclAllReduceTest + displayName: Run RCCL AllReduce Test with or without MSCCLPP Lib + remoteScript: | + mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/lib/libmscclpp_nccl.so -x MSCCLPP_NCCL_SYMMETRIC_MEMORY=1 -x NCCL_DEBUG=WARN /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + mpirun -np 8 --bind-to numa --allow-run-as-root /root/rocm-systems/projects/rccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20 + +- template: stop.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} diff --git a/.azure-pipelines/templates/run-remote-task.yml b/.azure-pipelines/templates/run-remote-task.yml new file mode 100644 index 000000000..37b3a7d7e --- /dev/null +++ b/.azure-pipelines/templates/run-remote-task.yml @@ -0,0 +1,27 @@ +parameters: +- name: name + type: string + default: '' +- name: displayName + type: string +- name: runRemoteArgs + type: string + default: '' +- name: remoteScript + type: string +- name: workingDirectory + type: string + default: '$(System.DefaultWorkingDirectory)' + +steps: +- task: Bash@3 + ${{ if ne(parameters.name, '') }}: + name: ${{ parameters.name }} + displayName: ${{ parameters.displayName }} + inputs: + targetType: 'inline' + script: | + test/deploy/run-remote.sh ${{ parameters.runRemoteArgs }} <<'REMOTE_CMD' + ${{ parameters.remoteScript }} + REMOTE_CMD + workingDirectory: ${{ parameters.workingDirectory }} diff --git a/.azure-pipelines/templates/stop.yml b/.azure-pipelines/templates/stop.yml new file mode 100644 index 000000000..40498c290 --- /dev/null +++ b/.azure-pipelines/templates/stop.yml @@ -0,0 +1,20 @@ +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: resourceGroup + type: string + default: mscclpp + +steps: +- task: AzureCLI@2 + name: StopVMSS + displayName: Deallocate VMSS + condition: always() + inputs: + azureSubscription: ${{ parameters.subscription }} + scriptType: bash + scriptLocation: inlineScript + inlineScript: | + az vmss deallocate --name ${{ parameters.vmssName }} --resource-group ${{ parameters.resourceGroup }} diff --git a/.azure-pipelines/templates/ut-no-ib-env.yaml b/.azure-pipelines/templates/ut-no-ib-env.yaml deleted file mode 100644 index 0d97f9fc9..000000000 --- a/.azure-pipelines/templates/ut-no-ib-env.yaml +++ /dev/null @@ -1,191 +0,0 @@ -parameters: -- name: subscription - type: string -- name: vmssName - type: string -- name: sshKeySecureFile - type: string -- name: gpuArch - type: string - -steps: -- task: Bash@3 - name: Build - displayName: Build - inputs: - targetType: 'inline' - script: | - mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_USE_IB=OFF -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. - make -j - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: DownloadSecureFile@1 - name: SshKeyFile - displayName: Download key file - inputs: - secureFile: ${{ parameters.sshKeySecureFile }} - -- task: Bash@3 - name: InstallPackages - displayName: Install Packages - inputs: - targetType: 'inline' - script: | - sudo apt-get update -y - sudo apt-get install pssh -y - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - -- task: AzureCLI@2 - name: StartVMSS - displayName: Start VMSS - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp - -- task: Bash@3 - name: DeployTestEnv - displayName: Deploy Test Env - inputs: - targetType: filePath - filePath: test/deploy/deploy.sh - arguments: single-node-test false - workingDirectory: $(System.DefaultWorkingDirectory) - -- task: Bash@3 - name: UnitTests - displayName: Run mscclpp unit tests - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - cd /root/mscclpp; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - ./build/bin/unit_tests"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: MpUnitTests - displayName: Run mscclpp multi-process unit tests - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - cd /root/mscclpp; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests; \ - mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests; \ - mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: PyTests - displayName: Run pytests - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - export PATH=/usr/local/mpi/bin:\$PATH \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: StopContainer - displayName: Stop existing container - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION \ - "sudo docker stop mscclpp-test || true; sudo docker rm mscclpp-test || true" - rm -f $(System.DefaultWorkingDirectory)/sshkey $(System.DefaultWorkingDirectory)/sshkey.pub - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: BuildWithIb - displayName: Rebuild with IB - inputs: - targetType: 'inline' - script: | - rm -rf build && mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. - make -j - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: DeployTestEnvWithIb - displayName: Deploy Test Env (with IB build) - inputs: - targetType: filePath - filePath: test/deploy/deploy.sh - arguments: single-node-test false - workingDirectory: $(System.DefaultWorkingDirectory) - -- task: Bash@3 - name: PyTestsWithIbBuildDisableIb - displayName: Run pytests (IB build, IB tests disabled) - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - export PATH=/usr/local/mpi/bin:\$PATH \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: AzureCLI@2 - name: StopVMSS - displayName: Deallocate VMSS - condition: always() - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp \ No newline at end of file diff --git a/.azure-pipelines/templates/ut-no-ib-env.yml b/.azure-pipelines/templates/ut-no-ib-env.yml new file mode 100644 index 000000000..a62f1a77a --- /dev/null +++ b/.azure-pipelines/templates/ut-no-ib-env.yml @@ -0,0 +1,95 @@ +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: gpuArch + type: string + +steps: +- template: deploy.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} + gpuArch: ${{ parameters.gpuArch }} + cmakeArgs: '-DMSCCLPP_USE_IB=OFF' + deployArgs: 'single-node-test false' + +- template: run-remote-task.yml + parameters: + name: UnitTests + displayName: Run mscclpp unit tests + remoteScript: | + ./build/bin/unit_tests + +- template: run-remote-task.yml + parameters: + name: MpUnitTests + displayName: Run mscclpp multi-process unit tests + remoteScript: | + mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests + mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests + mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests + +- template: run-remote-task.yml + parameters: + name: PyTests + displayName: Run pytests + remoteScript: | + mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x + +- template: run-remote-task.yml + parameters: + name: StopContainer + displayName: Stop existing container + runRemoteArgs: '--no-docker --no-log' + remoteScript: | + sudo docker stop mscclpp-test || true + sudo docker rm mscclpp-test || true + +- task: Bash@3 + displayName: Remove generated SSH key files + inputs: + targetType: 'inline' + script: | + rm -f $(System.DefaultWorkingDirectory)/sshkey $(System.DefaultWorkingDirectory)/sshkey.pub + workingDirectory: '$(System.DefaultWorkingDirectory)' + +- task: Bash@3 + name: BuildWithIb + displayName: Rebuild with IB + inputs: + targetType: 'inline' + script: | + set -e + rm -rf build + mkdir -p build && cd build + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DMSCCLPP_BYPASS_GPU_CHECK=ON \ + -DMSCCLPP_USE_CUDA=ON \ + -DMSCCLPP_BUILD_TESTS=ON \ + -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. + make -j + workingDirectory: '$(System.DefaultWorkingDirectory)' + +- task: Bash@3 + name: DeployTestEnvWithIb + displayName: Deploy Test Env (with IB build) + inputs: + targetType: filePath + filePath: test/deploy/deploy.sh + arguments: single-node-test false + workingDirectory: $(System.DefaultWorkingDirectory) + +- template: run-remote-task.yml + parameters: + name: PyTestsWithIbBuildDisableIb + displayName: Run pytests (IB build, IB tests disabled) + remoteScript: | + mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x MSCCLPP_DISABLE_IB_TESTS=1 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x + +- template: stop.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} diff --git a/.azure-pipelines/templates/ut-npkit.yaml b/.azure-pipelines/templates/ut-npkit.yaml deleted file mode 100644 index 5c35317e6..000000000 --- a/.azure-pipelines/templates/ut-npkit.yaml +++ /dev/null @@ -1,145 +0,0 @@ -parameters: -- name: subscription - type: string -- name: vmssName - type: string -- name: sshKeySecureFile - type: string -- name: gpuArch - type: string - - -steps: -- task: DownloadSecureFile@1 - name: SshKeyFile - displayName: Download key file - inputs: - secureFile: ${{ parameters.sshKeySecureFile }} - -- task: Bash@3 - name: InstallPackages - displayName: Install Packages - inputs: - targetType: inline - script: | - sudo apt-get update -y - sudo apt-get install pssh -y - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - -- task: AzureCLI@2 - name: StartVMSS - displayName: Start VMSS - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp - -- task: Bash@3 - name: DeployTestEnv - displayName: Deploy Test Env - inputs: - targetType: filePath - filePath: test/deploy/deploy.sh - arguments: "single-node-test" - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: Build - displayName: Build - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - set -e; \ - cd /root/mscclpp; \ - mkdir -p build && cd build; \ - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} -DMSCCLPP_NPKIT_FLAGS=\"-DENABLE_NPKIT -DENABLE_NPKIT_EVENT_TIME_SYNC_CPU -DENABLE_NPKIT_EVENT_TIME_SYNC_GPU -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_EXIT -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT\" ..; \ - make -j"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: MpUnitTests - displayName: Run mscclpp multi-process unit tests - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - cd /root/mscclpp; \ - rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests --gtest_filter=\"ExecutorTest.TwoNodesAllreduce\"; \ - python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output; \ - grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: PyTests - displayName: Run pytests - inputs: - targetType: 'inline' - script: | - # set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - cd /root/mscclpp; \ - rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output; \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce.json'; \ - python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output; \ - grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json; \ - rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output; \ - mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce_packet.json'; \ - python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output; \ - grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_COPY_PACKET_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_PUT_PACKET_ENTRY ./npkit_output/npkit_event_trace.json; \ - grep -q NPKIT_EVENT_EXECUTOR_REDUCE_SEND_PACKET_ENTRY ./npkit_output/npkit_event_trace.json"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: AzureCLI@2 - name: StopVMSS - displayName: Deallocate VMSS - condition: always() - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp diff --git a/.azure-pipelines/templates/ut-npkit.yml b/.azure-pipelines/templates/ut-npkit.yml new file mode 100644 index 000000000..e53b5cf59 --- /dev/null +++ b/.azure-pipelines/templates/ut-npkit.yml @@ -0,0 +1,57 @@ +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: gpuArch + type: string + + +steps: +- template: deploy.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} + gpuArch: ${{ parameters.gpuArch }} + cmakeArgs: '-DMSCCLPP_NPKIT_FLAGS="-DENABLE_NPKIT -DENABLE_NPKIT_EVENT_TIME_SYNC_CPU -DENABLE_NPKIT_EVENT_TIME_SYNC_GPU -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_INIT_EXIT -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_ENTRY -DENABLE_NPKIT_EVENT_EXECUTOR_OP_BASE_EXIT"' + deployArgs: 'single-node-test' + +- template: run-remote-task.yml + parameters: + name: MpUnitTests + displayName: Run mscclpp multi-process unit tests + remoteScript: | + rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output + export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump + mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests --filter="ExecutorTest.TwoNodesAllreduce" + python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output + grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json + +- template: run-remote-task.yml + parameters: + name: PyTests + displayName: Run pytests + remoteScript: | + rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output + export MSCCLPP_NPKIT_DUMP_DIR=./npkit_dump + mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce.json' + python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output + grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_SIGNAL_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_WAIT_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_READ_REDUCE_COPY_SEND_ENTRY ./npkit_output/npkit_event_trace.json + rm -rf ./npkit_dump && mkdir ./npkit_dump && rm -rf ./npkit_output && mkdir ./npkit_output + mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x -k 'test_executor[allreduce_packet.json' + python3 ./tools/npkit/npkit_trace_generator.py --npkit_dump_dir=./npkit_dump --npkit_event_header_path=./include/mscclpp/npkit/npkit_event.hpp --output_dir=./npkit_output + grep -q NPKIT_EVENT_EXECUTOR_INIT_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_COPY_PACKET_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_PUT_PACKET_ENTRY ./npkit_output/npkit_event_trace.json + grep -q NPKIT_EVENT_EXECUTOR_REDUCE_SEND_PACKET_ENTRY ./npkit_output/npkit_event_trace.json + +- template: stop.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} diff --git a/.azure-pipelines/templates/ut.yaml b/.azure-pipelines/templates/ut.yaml deleted file mode 100644 index 2086fd0ac..000000000 --- a/.azure-pipelines/templates/ut.yaml +++ /dev/null @@ -1,142 +0,0 @@ -parameters: -- name: subscription - type: string -- name: vmssName - type: string -- name: sshKeySecureFile - type: string -- name: platform - type: string - default: 'cuda' -- name: gpuArch - type: string - -steps: -- task: Bash@3 - name: Build - displayName: Build - inputs: - targetType: 'inline' - script: | - mkdir build && cd build - if [ "${{ parameters.platform }}" == "rocm" ]; then - CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. - else - cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=ON -DMSCCLPP_GPU_ARCHS=${{ parameters.gpuArch }} .. - fi - make -j - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: DownloadSecureFile@1 - name: SshKeyFile - displayName: Download key file - inputs: - secureFile: ${{ parameters.sshKeySecureFile }} - -- task: Bash@3 - name: InstallPackages - displayName: Install Packages - inputs: - targetType: 'inline' - script: | - sudo apt-get update -y - sudo apt-get install pssh -y - curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash - -- task: AzureCLI@2 - name: StartVMSS - displayName: Start VMSS - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss start --name ${{ parameters.vmssName }} --resource-group mscclpp - -- task: Bash@3 - name: DeployTestEnv - displayName: Deploy Test Env - inputs: - targetType: filePath - filePath: test/deploy/deploy.sh - arguments: "single-node-test true ${{ parameters.platform }}" - workingDirectory: '$(System.DefaultWorkingDirectory)' - - -- task: Bash@3 - name: UnitTests - displayName: Run mscclpp unit tests - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - cd /root/mscclpp; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - ./build/bin/unit_tests"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: MpUnitTests - displayName: Run mscclpp multi-process unit tests - inputs: - targetType: 'inline' - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - export PATH=/usr/local/mpi/bin:\$PATH; \ - cd /root/mscclpp; \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests; \ - mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests; \ - mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: Bash@3 - name: PyTests - displayName: Run pytests - inputs: - targetType: inline - script: | - set -e - HOSTFILE=$(System.DefaultWorkingDirectory)/test/deploy/hostfile_ci - SSH_OPTION="StrictHostKeyChecking=no" - KeyFilePath=${SSHKEYFILE_SECUREFILEPATH} - : > azureuser@10.0.0.4 - tail -f azureuser@10.0.0.4 & - CHILD_PID=$! - parallel-ssh -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" -o . \ - -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \ - export PATH=/usr/local/mpi/bin:\$PATH \ - export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\$LD_LIBRARY_PATH; \ - cd /root/mscclpp; \ - mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x GPU_MAX_HW_QUEUES=8 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x"' - kill $CHILD_PID - workingDirectory: '$(System.DefaultWorkingDirectory)' - -- task: AzureCLI@2 - name: StopVMSS - displayName: Deallocate VMSS - condition: always() - inputs: - azureSubscription: ${{ parameters.subscription }} - scriptType: bash - scriptLocation: inlineScript - inlineScript: | - az vmss deallocate --name ${{ parameters.vmssName }} --resource-group mscclpp diff --git a/.azure-pipelines/templates/ut.yml b/.azure-pipelines/templates/ut.yml new file mode 100644 index 000000000..9d17e9235 --- /dev/null +++ b/.azure-pipelines/templates/ut.yml @@ -0,0 +1,48 @@ +parameters: +- name: subscription + type: string +- name: vmssName + type: string +- name: platform + type: string + default: 'cuda' +- name: gpuArch + type: string + +steps: +- template: deploy.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} + platform: ${{ parameters.platform }} + gpuArch: ${{ parameters.gpuArch }} + deployArgs: 'single-node-test true ${{ parameters.platform }}' + + +- template: run-remote-task.yml + parameters: + name: UnitTests + displayName: Run mscclpp unit tests + remoteScript: | + ./build/bin/unit_tests + +- template: run-remote-task.yml + parameters: + name: MpUnitTests + displayName: Run mscclpp multi-process unit tests + remoteScript: | + mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests + mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests + mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests + +- template: run-remote-task.yml + parameters: + name: PyTests + displayName: Run pytests + remoteScript: | + mpirun --allow-run-as-root -tag-output -x MSCCLPP_HOME=/root/mscclpp -x GPU_MAX_HW_QUEUES=8 -np 8 python3 -m pytest ./python/test/test_mscclpp.py -x + +- template: stop.yml + parameters: + subscription: ${{ parameters.subscription }} + vmssName: ${{ parameters.vmssName }} diff --git a/.azure-pipelines/ut-rocm.yml b/.azure-pipelines/ut-rocm.yml deleted file mode 100644 index 8b0aed1a0..000000000 --- a/.azure-pipelines/ut-rocm.yml +++ /dev/null @@ -1,50 +0,0 @@ -trigger: - branches: - include: - - main - - release/* - paths: - exclude: - - .devcontainer/** - - .github/** - - apps/** - - docker/** - - docs/** - - '**/*.md' - -pr: - branches: - include: - - main - - release/* - drafts: false - paths: - exclude: - - .devcontainer/** - - .github/** - - apps/** - - docker/** - - docs/** - - '**/*.md' - -jobs: -- job: UnitTestMI300X - timeoutInMinutes: 40 - pool: - name: msccl-ci-mi300x - strategy: - matrix: - rocm6_2: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2 - - container: - image: $(containerImage) - - steps: - - template: templates/ut.yaml - parameters: - subscription: mscclpp-ci-mi300x - vmssName: mscclpp-mi300x-ci - sshKeySecureFile: mscclpp.pem - platform: rocm - gpuArch: gfx942 diff --git a/.azure-pipelines/ut.yml b/.azure-pipelines/ut.yml index 4aac07e64..4e6f96b1c 100644 --- a/.azure-pipelines/ut.yml +++ b/.azure-pipelines/ut.yml @@ -37,17 +37,16 @@ jobs: cuda11: containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8 cuda12: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4 + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 container: image: $(containerImage) steps: - - template: templates/ut.yaml + - template: templates/ut.yml parameters: subscription: mscclpp-ci vmssName: mscclpp-ci - sshKeySecureFile: mscclpp.pem gpuArch: '80' - job: UnitTestWithNpKitA100 @@ -59,17 +58,16 @@ jobs: cuda11: containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda11.8 cuda12: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4 + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 container: image: $(containerImage) steps: - - template: templates/ut-npkit.yaml + - template: templates/ut-npkit.yml parameters: subscription: mscclpp-ci vmssName: mscclpp-ci - sshKeySecureFile: mscclpp.pem gpuArch: '80' - job: UnitTestH100 @@ -79,17 +77,16 @@ jobs: strategy: matrix: cuda12: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4 + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 container: image: $(containerImage) steps: - - template: templates/ut.yaml + - template: templates/ut.yml parameters: subscription: mscclpp-ci-h100 vmssName: mscclpp-h100-ci - sshKeySecureFile: mscclpp.pem gpuArch: '90' - job: UnitTestWithNpKitH100 @@ -99,17 +96,16 @@ jobs: strategy: matrix: cuda12: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4 + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 container: image: $(containerImage) steps: - - template: templates/ut-npkit.yaml + - template: templates/ut-npkit.yml parameters: subscription: mscclpp-ci-h100 vmssName: mscclpp-h100-ci - sshKeySecureFile: mscclpp.pem gpuArch: '90' - job: UnitTestNoIBEnv @@ -121,15 +117,34 @@ jobs: strategy: matrix: cuda12: - containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.4 + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 container: image: $(containerImage) steps: - - template: templates/ut-no-ib-env.yaml + - template: templates/ut-no-ib-env.yml parameters: subscription: mscclpp-ci-h100 vmssName: mscclpp-h100-ci - sshKeySecureFile: mscclpp.pem gpuArch: '90' + +- job: UnitTestMI300X + timeoutInMinutes: 40 + pool: + name: msccl-ci-mi300x + strategy: + matrix: + rocm6_2: + containerImage: ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2 + + container: + image: $(containerImage) + + steps: + - template: templates/ut.yml + parameters: + subscription: mscclpp-ci-mi300x + vmssName: mscclpp-mi300x-ci + platform: rocm + gpuArch: gfx942 diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 000000000..a98f1e89e --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,24 @@ +codecov: + require_ci_to_pass: yes + +coverage: + status: + project: + default: + target: 68% + threshold: 1% + patch: + default: + target: 80% + +flag_management: + default_rules: + carryforward: true + +ignore: + - "test/" + - "examples/" + - "python/" + - "tools/" + - "docs/" + - "docker/" diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index db3b488a7..fb0651415 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -40,7 +40,7 @@ jobs: fail-fast: false matrix: language: [ 'cpp', 'python' ] - version: [ 'cuda11.8', 'cuda12.8' ] + version: [ 'cuda11.8', 'cuda12.9' ] steps: - name: Checkout repository @@ -62,7 +62,7 @@ jobs: - name: Build run: | rm -rf build && mkdir build && cd build - cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON .. + cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON -DMSCCLPP_BUILD_TESTS=OFF .. make -j4 - name: Perform CodeQL Analysis @@ -107,7 +107,7 @@ jobs: - name: Build run: | rm -rf build && mkdir build && cd build - CXX=/opt/rocm/bin/hipcc cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON .. + CXX=/opt/rocm/bin/hipcc cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON -DMSCCLPP_BUILD_TESTS=OFF .. make -j4 - name: Perform CodeQL Analysis diff --git a/.github/workflows/doc-build.yaml b/.github/workflows/doc-build.yml similarity index 100% rename from .github/workflows/doc-build.yaml rename to .github/workflows/doc-build.yml diff --git a/.github/workflows/integration-test-backup.yml b/.github/workflows/integration-test-backup.yml deleted file mode 100644 index 900e8aba2..000000000 --- a/.github/workflows/integration-test-backup.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: IntegrationTest - -on: workflow_dispatch - -jobs: - IntegrationTest: - runs-on: [ self-hosted, A100 ] - defaults: - run: - shell: bash - strategy: - matrix: - cuda: [ cuda11.8, cuda12.2 ] - - container: - image: "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-${{ matrix.cuda }}" - options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1 - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Build - run: | - mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j - - - name: Lock GPU clock frequency - run: | - sudo nvidia-smi -pm 1 - for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do - sudo nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i - done - - - name: Run mscclpp AllGather test - run: | - set -e - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl - - - name: Run mscclpp SendRecv test - run: | - set -e - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/sendrecv_test_perf -b 1K -e 1G -f 2 -o output.jsonl - - - name: Run mscclpp AllReduce test - run: | - set -e - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 4 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 12M -e 48M -i 3145728 2 -k 5 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/allreduce_test_perf -b 24K -e 768K -i 24576 -k 6 -w 100 -n 100 -o output.jsonl - - - name: Run mscclpp AllToAll test - run: | - set -e - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -o output.jsonl - mpirun --allow-run-as-root -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/bin/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl - - - name: Check collective primitives performance - run: | - set -e - python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file test/deploy/perf_ndmv4.jsonl diff --git a/.github/workflows/mscclpp-lang.yml b/.github/workflows/mscclpp-lang.yml index 5947b087d..a9187e968 100644 --- a/.github/workflows/mscclpp-lang.yml +++ b/.github/workflows/mscclpp-lang.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - version: [ 'cuda11.8', 'cuda12.8' ] + version: [ 'cuda11.8', 'cuda12.9' ] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/ut-backup.yml b/.github/workflows/ut-backup.yml deleted file mode 100644 index 8849c353e..000000000 --- a/.github/workflows/ut-backup.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: UnitTest - -on: workflow_dispatch - -jobs: - UnitTest: - runs-on: [ self-hosted, A100 ] - defaults: - run: - shell: bash - timeout-minutes: 30 - strategy: - matrix: - cuda: [ cuda11.8, cuda12.2 ] - - container: - image: "ghcr.io/microsoft/mscclpp/mscclpp:base-dev-${{ matrix.cuda }}" - options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1 - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Build - run: | - mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j - working-directory: ${{ github.workspace }} - - - name: LockGPUClock - run: | - sudo nvidia-smi -pm 1 - for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do - sudo nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i - done - - - name: UnitTests - run: | - ./build/bin/unit_tests - - - name: MpUnitTests - run: | - set -e - mpirun --allow-run-as-root -tag-output -np 2 ./build/bin/mp_unit_tests - mpirun --allow-run-as-root -tag-output -np 4 ./build/bin/mp_unit_tests - mpirun --allow-run-as-root -tag-output -np 8 ./build/bin/mp_unit_tests - - - name: PyTests - run: | - set -e - mpirun --allow-run-as-root -tag-output -np 8 $(which pytest) ./python/test/test_mscclpp.py -x diff --git a/.gitignore b/.gitignore index ed3b94c41..74307e67f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vscode/ build/ +build_coverage/ __pycache__ .*.swp *.so diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ff7b0756..9db54d157 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. +# Licensed under the MIT License. cmake_minimum_required(VERSION 3.25) project(mscclpp LANGUAGES CXX) @@ -56,6 +56,7 @@ option(MSCCLPP_USE_ROCM "Use AMD/ROCm." OFF) option(MSCCLPP_USE_IB "Use InfiniBand." ON) option(MSCCLPP_BYPASS_GPU_CHECK "Bypass GPU check." OFF) option(MSCCLPP_NPKIT_FLAGS "Set NPKIT flags" OFF) +option(MSCCLPP_ENABLE_COVERAGE "Enable code coverage" OFF) option(MSCCLPP_DISABLE_NB_LEAK_WARNINGS "Disable Nanobind leak warnings" ON) set(MSCCLPP_GPU_ARCHS "" CACHE STRING "Specify GPU architectures with delimiters (comma, space, or semicolon).") @@ -99,6 +100,62 @@ else() message(FATAL_ERROR "No compatible GPU found. Set MSCCLPP_USE_CUDA or MSCCLPP_USE_ROCM to ON.") endif() endif() + +# Code coverage setup +if(MSCCLPP_ENABLE_COVERAGE) + if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") + message(WARNING "Code coverage results with an optimized (non-Debug) build may be misleading") + endif() + + if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + message(STATUS "Code coverage enabled") + + # Add coverage flags to C++ targets only (not CUDA) + add_compile_options($<$:--coverage>) + add_compile_options($<$:-O0>) + add_compile_options($<$:-g>) + add_link_options($<$:--coverage>) + + # Find lcov + find_program(LCOV_PATH lcov) + + if(NOT LCOV_PATH) + message(WARNING "lcov not found. Install lcov to generate coverage reports.") + endif() + + if(LCOV_PATH) + # Add coverage target + add_custom_target(coverage + COMMAND ${CMAKE_COMMAND} -E echo "Removing old coverage data..." + COMMAND ${LCOV_PATH} --directory . --zerocounters + + COMMAND ${CMAKE_COMMAND} -E echo "Running tests..." + COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure + + COMMAND ${CMAKE_COMMAND} -E echo "Collecting coverage data..." + COMMAND ${LCOV_PATH} --directory . --capture --output-file coverage.info + + COMMAND ${CMAKE_COMMAND} -E echo "Filtering coverage data..." + COMMAND ${LCOV_PATH} --remove coverage.info '/usr/*' '*/test/*' '*/build/*' --output-file coverage.info + + COMMAND ${CMAKE_COMMAND} -E echo "Coverage report generated in coverage.info" + + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Generating code coverage report" + ) + + # Add coverage clean target + add_custom_target(coverage-clean + COMMAND ${CMAKE_COMMAND} -E remove coverage.info + COMMAND ${LCOV_PATH} --directory . --zerocounters + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Cleaning coverage data" + ) + endif() + else() + message(WARNING "Code coverage is only supported with GCC or Clang compilers") + endif() +endif() if(MSCCLPP_GPU_ARCHS) string(STRIP "${MSCCLPP_GPU_ARCHS}" MSCCLPP_GPU_ARCHS) string(REPLACE " " ";" MSCCLPP_GPU_ARCHS "${MSCCLPP_GPU_ARCHS}") diff --git a/README.md b/README.md index 5366f5b52..58586a309 100644 --- a/README.md +++ b/README.md @@ -3,15 +3,16 @@ [![Latest Release](https://img.shields.io/github/release/microsoft/mscclpp.svg)](https://github.com/microsoft/mscclpp/releases/latest) [![License](https://img.shields.io/github/license/microsoft/mscclpp.svg)](LICENSE) [![CodeQL](https://github.com/microsoft/mscclpp/actions/workflows/codeql-analysis.yml/badge.svg?branch=main)](https://github.com/microsoft/mscclpp/actions/workflows/codeql-analysis.yml) -[![Docs Build](https://github.com/microsoft/mscclpp/actions/workflows/doc-build.yaml/badge.svg)](https://microsoft.github.io/mscclpp/) +[![Docs Build](https://github.com/microsoft/mscclpp/actions/workflows/doc-build.yml/badge.svg)](https://microsoft.github.io/mscclpp/) +[![codecov](https://codecov.io/gh/microsoft/mscclpp/graph/badge.svg?token=DAV9DGHAY2)](https://codecov.io/gh/microsoft/mscclpp) | Testing Pipelines | Build Status | |--------------------------|-------------------| -| Unit Tests (CUDA) | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-ut?branchName=main)](https://msazure.visualstudio.com/One/_build/latest?definitionId=398325&branchName=main) | -| Integration Tests (CUDA) | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-test?branchName=main)](https://msazure.visualstudio.com/One/_build/latest?definitionId=398479&branchName=main) | -| Unit Tests (ROCm) | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-ut-rocm?branchName=main)](https://msazure.visualstudio.com/One/_build/latest?definitionId=399295&branchName=main) | -| NCCL Tests | [![Build Status](https://dev.azure.com/msazure/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-nccl?branchName=main)](https://dev.azure.com/msazure/One/_build/latest?definitionId=320665&branchName=main) | -| RCCL Tests | [![Build Status](https://dev.azure.com/msazure/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-rccl?branchName=main)](https://dev.azure.com/msazure/One/_build/latest?definitionId=448013&branchName=main) | +| Unit Tests (CUDA) | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-ut?branchName=main&jobName=UnitTestH100)](https://msazure.visualstudio.com/One/_build/latest?definitionId=398325&branchName=main) | +| Unit Tests (ROCm) | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-ut?branchName=main&jobName=UnitTestMI300X)](https://msazure.visualstudio.com/One/_build/latest?definitionId=398325&branchName=main) | +| Integration Tests (CUDA) | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-test?branchName=main&jobName=Integration%20test%20H100)](https://msazure.visualstudio.com/One/_build/latest?definitionId=398479&branchName=main) | +| NCCL Tests | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-nccl?repoName=microsoft%2Fmscclpp&branchName=main&jobName=Run%20MSCCLPP%20over%20NCCL%20Test%20(H100))](https://msazure.visualstudio.com/One/_build/latest?definitionId=320665&repoName=microsoft%2Fmscclpp&branchName=main) | +| RCCL Tests | [![Build Status](https://msazure.visualstudio.com/One/_apis/build/status%2FCustom%2FMSCCLPP%2Fmscclpp-rccl?branchName=main&jobName=Run%20MSCCLPP%20over%20RCCL%20Test%20(MI300X))](https://msazure.visualstudio.com/One/_build/latest?definitionId=448013&branchName=main) | A GPU-driven communication stack for scalable AI applications. diff --git a/docker/base-dev-x.dockerfile b/docker/base-dev-x.dockerfile index 3aa814221..7c6c927eb 100644 --- a/docker/base-dev-x.dockerfile +++ b/docker/base-dev-x.dockerfile @@ -7,13 +7,38 @@ LABEL org.opencontainers.image.source=https://github.com/microsoft/mscclpp RUN apt-get update && \ apt-get install -y --no-install-recommends \ htop \ - lcov \ vim \ && \ apt-get autoremove -y && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* +# Install lcov 2.2 +RUN LCOV_VERSION="2.2" && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + cpanminus \ + gcc \ + make \ + perl \ + && \ + cpanm --notest \ + Capture::Tiny \ + DateTime \ + JSON::XS \ + Memory::Process \ + TimeDate \ + && \ + cd /tmp && \ + curl -L https://github.com/linux-test-project/lcov/releases/download/v${LCOV_VERSION}/lcov-${LCOV_VERSION}.tar.gz -o lcov.tar.gz && \ + tar xzf lcov.tar.gz && \ + cd lcov-${LCOV_VERSION} && \ + make install && \ + cd / && rm -rf /tmp/lcov* && \ + apt-get autoremove -y && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* + # Install CMake 3.26.4 RUN OS_ARCH=$(uname -m) && \ CMAKE_VERSION="3.26.4" && \ @@ -47,7 +72,8 @@ RUN target_type=$(echo $TARGET | sed 's/\.[0-9]*$//') && \ export CUPY_INSTALL_USE_HIP=1 && export ROCM_HOME=/opt/rocm; \ fi && \ pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r python/requirements_${target_type}.txt + pip install --no-cache-dir -r python/requirements_${target_type}.txt && \ + pip install --no-cache-dir coverage xlsxwriter # Cleanup RUN rm -rf /tmp/mscclpp diff --git a/docker/build.sh b/docker/build.sh index 63552f748..89568e197 100755 --- a/docker/build.sh +++ b/docker/build.sh @@ -4,27 +4,27 @@ set -e declare -A baseImageTable baseImageTable=( - ["cuda11.8"]="nvidia/cuda:11.8.0-devel-ubuntu20.04" - ["cuda12.1"]="nvidia/cuda:12.1.1-devel-ubuntu20.04" - ["cuda12.2"]="nvidia/cuda:12.2.2-devel-ubuntu20.04" - ["cuda12.3"]="nvidia/cuda:12.3.2-devel-ubuntu20.04" + ["cuda11.8"]="nvidia/cuda:11.8.0-devel-ubuntu22.04" ["cuda12.4"]="nvidia/cuda:12.4.1-devel-ubuntu22.04" ["cuda12.8"]="nvidia/cuda:12.8.1-devel-ubuntu22.04" - ["cuda12.9"]="nvidia/cuda:12.9.1-devel-ubuntu22.04" + ["cuda12.9"]="nvidia/cuda:12.9.1-devel-ubuntu24.04" ["cuda13.0"]="nvidia/cuda:13.0.2-devel-ubuntu24.04" ["rocm6.2"]="rocm/dev-ubuntu-22.04:6.2.2" ) declare -A extraLdPathTable extraLdPathTable=( - ["cuda12.1"]="/usr/local/cuda-12.1/compat:/usr/local/cuda-12.1/lib64" - ["cuda12.2"]="/usr/local/cuda-12.2/compat:/usr/local/cuda-12.2/lib64" - ["cuda12.3"]="/usr/local/cuda-12.3/compat:/usr/local/cuda-12.3/lib64" + ["cuda11.8"]="/usr/local/cuda-11.8/compat" + ["cuda12.4"]="/usr/local/cuda-12.4/compat" + ["cuda12.8"]="/usr/local/cuda-12.8/compat" + ["cuda12.9"]="/usr/local/cuda-12.9/compat" + ["cuda13.0"]="/usr/local/cuda-13.0/compat" ["rocm6.2"]="/opt/rocm/lib" ) declare -A ofedVersionTable ofedVersionTable=( + ["cuda11.8"]="23.07-0.5.1.2" ["cuda12.4"]="23.07-0.5.1.2" ["cuda12.8"]="24.10-1.1.4.0" ["cuda12.9"]="24.10-1.1.4.0" @@ -36,7 +36,7 @@ TARGET=${1} OS_ARCH=$(uname -m) print_usage() { - echo "Usage: $0 [cuda11.8|cuda12.1|cuda12.2|cuda12.3|cuda12.4|cuda12.8|cuda12.9|cuda13.0|rocm6.2]" + echo "Usage: $0 [cuda11.8|cuda12.4|cuda12.8|cuda12.9|cuda13.0|rocm6.2]" } if [[ ! -v "baseImageTable[${TARGET}]" ]]; then diff --git a/docs/quickstart.md b/docs/quickstart.md index ac1b7d6bb..b7a68050e 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -42,7 +42,7 @@ We provide docker images which package all prerequisites for MSCCL++. You can se ```bash # For NVIDIA platforms -$ docker run -it --privileged --net=host --ipc=host --gpus all --name mscclpp-dev ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.8 bash +$ docker run -it --privileged --net=host --ipc=host --gpus all --name mscclpp-dev ghcr.io/microsoft/mscclpp/mscclpp:base-dev-cuda12.9 bash # For AMD platforms $ docker run -it --privileged --net=host --ipc=host --security-opt=seccomp=unconfined --group-add=video --name mscclpp-dev ghcr.io/microsoft/mscclpp/mscclpp:base-dev-rocm6.2 bash ``` @@ -171,7 +171,6 @@ We implement [NCCL](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/ap For example, you can run [nccl-tests](https://github.com/NVIDIA/nccl-tests) using `libmscclpp_nccl.so` as follows, where `MSCCLPP_BUILD` is your MSCCL++ build directory. ```bash -export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=$MSCCLPP_BUILD/lib/libmscclpp_nccl.so ./build/all_reduce_perf -b 1K -e 256M -f 2 -d half -G 20 -w 10 -n 50 ``` @@ -189,13 +188,11 @@ By default, if the parameter `MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION` is not spec Example 1, Allreduce will fallback to NCCL ncclAllReduce since allreduce is in the fallback list. ```bash -export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=$MSCCLPP_BUILD/lib/libmscclpp_nccl.so -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=$NCCL_BUILD/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce,allgather" ./build/all_reduce_perf -b 1K -e 256M -f 2 -d half -G 20 -w 10 -n 50 ``` Example 2, ReduceScatter will still use msccl++ implementation since reducescatter is not in the fallbacklist. ```bash -export LD_LIBRARY_PATH=/root/mscclpp/build/lib:$LD_LIBRARY_PATH; mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=$MSCCLPP_BUILD/lib/libmscclpp_nccl.so -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=$NCCL_BUILD/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" ./build/reduce_scatter_perf -b 1K -e 256M -f 2 -d half -G 20 -w 10 -n 50 ``` diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6452ebf8f..82b799dca 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,13 +1,12 @@ # Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. +# Licensed under the MIT License. -find_package(MPI) +find_package(MPI REQUIRED) set(TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads) if(MSCCLPP_USE_IB) list(APPEND TEST_LIBS_COMMON ${IBVERBS_LIBRARIES}) endif() -set(TEST_LIBS_GTEST GTest::gtest_main GTest::gmock_main) set(TEST_INC_COMMON PRIVATE ${PROJECT_SOURCE_DIR}/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS}) set(TEST_INC_INTERNAL PRIVATE ${PROJECT_SOURCE_DIR}/src/core/include) @@ -17,6 +16,7 @@ if(MSCCLPP_USE_ROCM) foreach(arch ${MSCCLPP_GPU_ARCHS}) add_compile_options(--offload-arch=${arch}) endforeach() + add_compile_definitions(__HIP_PLATFORM_AMD__) endif() function(add_test_executable name sources) @@ -38,28 +38,25 @@ add_test_executable(executor_test executor_test.cc) configure_file(run_mpi_test.sh.in run_mpi_test.sh) include(CTest) -include(FetchContent) -FetchContent_Declare(googletest URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip) -option(INSTALL_GTEST OFF) -FetchContent_MakeAvailable(googletest) -include(GoogleTest) + +# Build test framework library +add_library(test_framework STATIC framework.cc) +target_include_directories(test_framework PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${TEST_INC_COMMON}) +target_link_libraries(test_framework PUBLIC MPI::MPI_CXX) # Unit tests add_executable(unit_tests) -target_link_libraries(unit_tests ${TEST_LIBS_COMMON} ${TEST_LIBS_GTEST}) +target_link_libraries(unit_tests ${TEST_LIBS_COMMON} test_framework) target_include_directories(unit_tests ${TEST_INC_COMMON} ${TEST_INC_INTERNAL}) add_subdirectory(unit) -gtest_discover_tests(unit_tests DISCOVERY_MODE PRE_TEST) +add_test(NAME unit_tests COMMAND unit_tests) # Multi-process unit tests add_executable(mp_unit_tests) -target_link_libraries(mp_unit_tests ${TEST_LIBS_COMMON} ${TEST_LIBS_GTEST} MPI::MPI_CXX) +target_link_libraries(mp_unit_tests ${TEST_LIBS_COMMON} test_framework MPI::MPI_CXX) target_include_directories(mp_unit_tests ${TEST_INC_COMMON} ${TEST_INC_INTERNAL}) add_subdirectory(mp_unit) -gtest_discover_tests(mp_unit_tests DISCOVERY_MODE PRE_TEST) +add_test(NAME mp_unit_tests COMMAND ${CMAKE_CURRENT_BINARY_DIR}/run_mpi_test.sh mp_unit_tests 2) # mscclpp-test add_subdirectory(mscclpp-test) - -# Performance tests -add_subdirectory(perf) diff --git a/test/deploy/deploy.sh b/test/deploy/deploy.sh index b26ff1a85..1f1d0e524 100644 --- a/test/deploy/deploy.sh +++ b/test/deploy/deploy.sh @@ -1,4 +1,4 @@ -set -e +set -ex TEST_NAME=$1 IB_ENVIRONMENT="${2:-true}" diff --git a/test/deploy/run-remote.sh b/test/deploy/run-remote.sh new file mode 100755 index 000000000..b646ea92e --- /dev/null +++ b/test/deploy/run-remote.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# Run a command on remote CI VMs via parallel-ssh. +# By default, runs inside the mscclpp-test docker container. +# +# Usage: +# run-remote.sh [OPTIONS] < +# +# Options: +# --no-docker Run command directly on the host, not inside docker +# --no-log Don't tail the log file in the background +# --hostfile Override hostfile path (default: test/deploy/hostfile_ci) +# --host Run command on a single host (uses parallel-ssh -H) +# --user SSH user when using --host or custom hostfile + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HOSTFILE="${SCRIPT_DIR}/hostfile_ci" +SSH_OPTION="StrictHostKeyChecking=no" +KeyFilePath="${SSHKEYFILE_SECUREFILEPATH}" + +USE_DOCKER=true +USE_LOG=true +TARGET_HOST="" +REMOTE_USER="" + +usage() { + echo "Usage: $0 [--no-docker] [--no-log] [--hostfile ] [--host ] [--user ] < " >&2 +} + +require_value() { + local opt="$1" + local val="$2" + if [ -z "$val" ]; then + echo "Missing value for ${opt}" >&2 + exit 1 + fi +} + +while [[ "$1" == --* ]]; do + case "$1" in + --no-docker) USE_DOCKER=false; shift ;; + --no-log) USE_LOG=false; shift ;; + --hostfile) + require_value "--hostfile" "${2-}" + HOSTFILE="$2" + shift 2 + ;; + --host) + require_value "--host" "${2-}" + TARGET_HOST="$2" + shift 2 + ;; + --user) + require_value "--user" "${2-}" + REMOTE_USER="$2" + shift 2 + ;; + *) echo "Unknown option: $1" >&2; exit 1 ;; + esac +done + +if [ $# -ne 0 ] || [ -t 0 ]; then + usage + exit 1 +fi + +CMD=$(cat) +if [ -z "$CMD" ]; then + usage + exit 1 +fi +CMD_B64=$(printf '%s' "$CMD" | base64 | tr -d '\n') + +PSSH_TARGET_ARGS=() +if [ -n "$TARGET_HOST" ]; then + PSSH_TARGET_ARGS=(-H "$TARGET_HOST") +else + PSSH_TARGET_ARGS=(-h "$HOSTFILE") +fi + +PSSH_USER_ARGS=() +if [ -n "$REMOTE_USER" ]; then + PSSH_USER_ARGS=(-l "$REMOTE_USER") +fi + +PSSH_COMMON=( + -t 0 + "${PSSH_TARGET_ARGS[@]}" + "${PSSH_USER_ARGS[@]}" + -x "-i ${KeyFilePath}" + -O "$SSH_OPTION" +) + +if $USE_DOCKER; then + INNER="set -euxo pipefail;" + INNER+=" cd /root/mscclpp;" + INNER+=" export LD_LIBRARY_PATH=/root/mscclpp/build/lib:\\\$LD_LIBRARY_PATH;" + INNER+=" CMD_B64='${CMD_B64}';" + INNER+=" printf '%s' \\\"\\\$CMD_B64\\\" | base64 -d | bash -euxo pipefail" + + parallel-ssh -i "${PSSH_COMMON[@]}" \ + "sudo docker exec mscclpp-test bash -c \"${INNER}\"" +else + parallel-ssh -i "${PSSH_COMMON[@]}" \ + "set -euxo pipefail; CMD_B64='${CMD_B64}'; printf '%s' \"\$CMD_B64\" | base64 -d | bash -euxo pipefail" +fi diff --git a/test/deploy/run_tests.sh b/test/deploy/run_tests.sh index 488fa81f6..0c05a090c 100644 --- a/test/deploy/run_tests.sh +++ b/test/deploy/run_tests.sh @@ -1,6 +1,5 @@ set -e HOSTFILE=/root/mscclpp/test/deploy/hostfile_mpi -export PATH=/usr/local/mpi/bin:$PATH function run_mscclpp_test() { diff --git a/test/executor_test.cc b/test/executor_test.cc index 0e7869aba..2378e7ffd 100644 --- a/test/executor_test.cc +++ b/test/executor_test.cc @@ -93,11 +93,8 @@ double benchTime(int rank, std::shared_ptr bootstrap, std::s int main(int argc, char* argv[]) { if (argc != 5 && argc != 6) { - std::cerr << "Usage: " << argv[0] << " " - << " " - << " " - << " " - << " (optional) " << std::endl; + std::cerr << "Usage: " << argv[0] << " " + << " (optional) " << std::endl; return 1; } @@ -142,7 +139,8 @@ int main(int argc, char* argv[]) { NpKit::Shutdown(); } - std::cout << "Rank " << rank << ": " << bufferSize << " bytes " << deltaSec * 1.e6 << " us" << std::endl; + double latencyUs = deltaSec * 1.e6; + std::cout << "Rank " << rank << ": " << bufferSize << " bytes " << latencyUs << " us" << std::endl; MPI_Finalize(); return 0; } diff --git a/test/framework.cc b/test/framework.cc new file mode 100644 index 000000000..73cf1272e --- /dev/null +++ b/test/framework.cc @@ -0,0 +1,323 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "framework.hpp" + +#include +#include +#include +#include +#include + +namespace mscclpp { +namespace test { + +// Global state +static int gMpiRank = 0; +static int gMpiSize = 1; +static bool gMpiInitialized = false; +static bool gCurrentTestPassed = true; +static std::string gCurrentTestFailureMessage; +static std::string gCurrentTestName; + +std::string currentTestName() { return gCurrentTestName; } + +namespace utils { + +void initializeMPI(int argc, char* argv[]) { + if (gMpiInitialized) return; + + int initialized = 0; + MPI_Initialized(&initialized); + if (!initialized) { + MPI_Init(&argc, &argv); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &gMpiRank); + MPI_Comm_size(MPI_COMM_WORLD, &gMpiSize); + gMpiInitialized = true; +} + +static void finalizeMPI() { + if (!gMpiInitialized) return; + + MPI_Finalize(); + gMpiInitialized = false; +} + +bool isMainRank() { return gMpiRank == 0; } + +int getMPIRank() { return gMpiRank; } + +int getMPISize() { return gMpiSize; } + +void cleanupMPI() { finalizeMPI(); } + +void reportFailure(const char* file, int line, const std::string& message) { + gCurrentTestPassed = false; + std::ostringstream oss; + oss << file << ":" << line << ": " << message; + if (!gCurrentTestFailureMessage.empty()) { + gCurrentTestFailureMessage += "\n"; + } + gCurrentTestFailureMessage += oss.str(); + std::cerr << oss.str() << std::endl; +} + +void reportSuccess() { + gCurrentTestPassed = true; + gCurrentTestFailureMessage.clear(); +} + +// Timer implementation +Timer::Timer() : isRunning_(false) {} + +void Timer::start() { + startTime_ = std::chrono::high_resolution_clock::now(); + isRunning_ = true; +} + +void Timer::stop() { + endTime_ = std::chrono::high_resolution_clock::now(); + isRunning_ = false; +} + +double Timer::elapsedMicroseconds() const { + if (isRunning_) { + auto now = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(now - startTime_).count(); + } + return std::chrono::duration_cast(endTime_ - startTime_).count(); +} + +double Timer::elapsedMilliseconds() const { return elapsedMicroseconds() / 1000.0; } + +double Timer::elapsedSeconds() const { return elapsedMicroseconds() / 1000000.0; } + +void cudaCheck(cudaError_t err, const char* file, int line) { + if (err != cudaSuccess) { + std::string msg = + std::string("CUDA error at ") + file + ":" + std::to_string(line) + " - " + cudaGetErrorString(err); + throw std::runtime_error(msg); + } +} + +} // namespace utils + +// TestRegistry implementation +TestRegistry& TestRegistry::instance() { + static TestRegistry registry; + return registry; +} + +void TestRegistry::registerTest(const std::string& suiteName, const std::string& testName, TestFactory factory, + bool isPerfTest) { + tests_.push_back({suiteName, testName, std::move(factory), isPerfTest}); +} + +void TestRegistry::addEnvironment(Environment* env) { environments_.push_back(env); } + +// Returns true if the test should run given the filter string. +// Filter syntax: +// "" -> run all +// "Pattern" -> run only tests whose full name contains Pattern +// "-Pattern" -> run all tests EXCEPT those whose full name contains Pattern +static bool matchesFilter(const std::string& fullName, const std::string& filter) { + if (filter.empty()) return true; + if (filter[0] == '-') { + // Negative filter: exclude tests matching any comma-separated pattern + std::string patterns = filter.substr(1); + size_t pos = 0; + while (pos < patterns.size()) { + size_t comma = patterns.find(',', pos); + std::string pattern = (comma == std::string::npos) ? patterns.substr(pos) : patterns.substr(pos, comma - pos); + if (!pattern.empty() && fullName.find(pattern) != std::string::npos) { + return false; + } + pos = (comma == std::string::npos) ? patterns.size() : comma + 1; + } + return true; + } + // Positive filter: include only matching tests + return fullName.find(filter) != std::string::npos; +} + +int TestRegistry::runAllTests(int argc, char* argv[]) { + // Initialize MPI if not already initialized + if (!gMpiInitialized) { + utils::initializeMPI(argc, argv); + } + + // Parse command line arguments + std::string filter; + bool excludePerfTests = false; + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + if (arg.find("--filter=") == 0) { + filter = arg.substr(9); // Length of "--filter=" + } else if (arg == "--filter" && i + 1 < argc) { + filter = argv[i + 1]; + ++i; + } else if (arg == "--exclude-perf-tests") { + excludePerfTests = true; + } + } + + // Set up global test environments + for (auto* env : environments_) { + try { + env->SetUp(); + } catch (const std::exception& e) { + if (gMpiRank == 0) { + std::cerr << "Failed to set up test environment: " << e.what() << std::endl; + } + return 1; + } + } + + int passed = 0; + int failed = 0; + int skipped = 0; + + // Count tests to run + int totalToRun = 0; + int skippedByFilter = 0; + for (const auto& entry : tests_) { + std::string fullName = entry.suiteName + "." + entry.testName; + if (excludePerfTests && entry.isPerfTest) { + skippedByFilter++; + continue; + } + if (!matchesFilter(fullName, filter)) { + skippedByFilter++; + continue; + } + totalToRun++; + } + + if (gMpiRank == 0) { + std::cout << "[==========] Running " << totalToRun << " tests"; + if (skippedByFilter > 0) { + std::cout << " (" << skippedByFilter << " skipped by filter)"; + } + std::cout << ".\n"; + } + + for (const auto& entry : tests_) { + std::string fullName = entry.suiteName + "." + entry.testName; + + if (excludePerfTests && entry.isPerfTest) continue; + if (!matchesFilter(fullName, filter)) continue; + + gCurrentTestPassed = true; + gCurrentTestFailureMessage.clear(); + gCurrentTestName = fullName; + + if (gMpiRank == 0) { + std::cout << "[ RUN ] " << fullName << std::endl; + } + + TestCase* testCase = nullptr; + bool testSkipped = false; + bool setUpSucceeded = false; + try { + testCase = entry.factory(); + testCase->SetUp(); + setUpSucceeded = true; + testCase->TestBody(); + } catch (const SkipException& e) { + gCurrentTestPassed = true; + testSkipped = true; + if (gMpiRank == 0) { + std::cout << "[ SKIPPED ] " << fullName << ": " << e.what() << std::endl; + } + } catch (const std::exception& e) { + gCurrentTestPassed = false; + if (gCurrentTestFailureMessage.empty()) { + gCurrentTestFailureMessage = e.what(); + } + } catch (...) { + gCurrentTestPassed = false; + if (gCurrentTestFailureMessage.empty()) { + gCurrentTestFailureMessage = "Unknown exception"; + } + } + + // Always call TearDown() if SetUp() succeeded, even if TestBody() threw + if (setUpSucceeded && testCase != nullptr) { + try { + testCase->TearDown(); + } catch (const std::exception& e) { + // If test already failed, keep original failure message + if (gCurrentTestPassed) { + gCurrentTestPassed = false; + gCurrentTestFailureMessage = std::string("TearDown() failed: ") + e.what(); + } + } catch (...) { + if (gCurrentTestPassed) { + gCurrentTestPassed = false; + gCurrentTestFailureMessage = "TearDown() failed with unknown exception"; + } + } + } + + delete testCase; + gCurrentTestName.clear(); + + if (testSkipped) { + skipped++; + continue; + } + + // Synchronize test status across all MPI processes + int localPassed = gCurrentTestPassed ? 1 : 0; + int globalPassed = 1; + if (gMpiInitialized) { + MPI_Allreduce(&localPassed, &globalPassed, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + } else { + globalPassed = localPassed; + } + + if (gMpiRank == 0) { + if (globalPassed) { + std::cout << "[ OK ] " << fullName << std::endl; + passed++; + } else { + std::cout << "[ FAILED ] " << fullName << std::endl; + failed++; + } + } + } + + if (gMpiRank == 0) { + std::cout << "[==========] " << totalToRun << " tests ran.\n"; + if (passed > 0) { + std::cout << "[ PASSED ] " << passed << " tests.\n"; + } + if (skipped > 0) { + std::cout << "[ SKIPPED ] " << skipped << " tests.\n"; + } + if (failed > 0) { + std::cout << "[ FAILED ] " << failed << " tests.\n"; + } + } + + // Tear down global test environments (in reverse order) + for (auto it = environments_.rbegin(); it != environments_.rend(); ++it) { + try { + (*it)->TearDown(); + delete *it; + } catch (const std::exception& e) { + if (gMpiRank == 0) { + std::cerr << "Failed to tear down test environment: " << e.what() << std::endl; + } + } + } + environments_.clear(); + + return failed > 0 ? 1 : 0; +} + +} // namespace test +} // namespace mscclpp diff --git a/test/framework.hpp b/test/framework.hpp new file mode 100644 index 000000000..26a32d5bc --- /dev/null +++ b/test/framework.hpp @@ -0,0 +1,405 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#ifndef MSCCLPP_TEST_FRAMEWORK_HPP_ +#define MSCCLPP_TEST_FRAMEWORK_HPP_ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mscclpp { +namespace test { + +// Test case base class +class TestCase { + public: + virtual ~TestCase() = default; + virtual void SetUp() {} + virtual void TearDown() {} + virtual void TestBody() = 0; +}; + +// Environment base class (for global test setup/teardown) +class Environment { + public: + virtual ~Environment() = default; + virtual void SetUp() {} + virtual void TearDown() {} +}; + +// Test registry and runner +class TestRegistry { + public: + using TestFactory = std::function; + + static TestRegistry& instance(); + + void registerTest(const std::string& suiteName, const std::string& testName, TestFactory factory, + bool isPerfTest = false); + void addEnvironment(Environment* env); + int runAllTests(int argc, char* argv[]); + + private: + TestRegistry() = default; + struct TestEntry { + std::string suiteName; + std::string testName; + TestFactory factory; + bool isPerfTest; + }; + std::vector tests_; + std::vector environments_; +}; + +// Returns "Suite.Name" for the currently running test, or "" if none. +std::string currentTestName(); + +// Utility functions +namespace utils { + +// MPI management +void initializeMPI(int argc, char* argv[]); +void cleanupMPI(); +bool isMainRank(); +int getMPIRank(); +int getMPISize(); + +// Timing utilities +class Timer { + public: + Timer(); + void start(); + void stop(); + double elapsedMicroseconds() const; + double elapsedMilliseconds() const; + double elapsedSeconds() const; + + private: + std::chrono::high_resolution_clock::time_point startTime_; + std::chrono::high_resolution_clock::time_point endTime_; + bool isRunning_; +}; + +// CUDA utilities +void cudaCheck(cudaError_t err, const char* file, int line); +#define CUDA_CHECK(call) mscclpp::test::utils::cudaCheck(call, __FILE__, __LINE__) + +// Test assertion helpers +void reportFailure(const char* file, int line, const std::string& message); +void reportSuccess(); + +} // namespace utils + +// Exception for test skips +class SkipException : public std::runtime_error { + public: + explicit SkipException(const std::string& message) : std::runtime_error(message) {} +}; + +// Helper class for FAIL() macro — supports message streaming via operator<< +class FailHelper { + public: + explicit FailHelper(const char* file, int line) : file_(file), line_(line) {} + template + FailHelper& operator<<(const T& value) { + message_ << value; + return *this; + } + ~FailHelper() noexcept(false) { + std::string msg = message_.str(); + if (!msg.empty()) { + ::mscclpp::test::utils::reportFailure(file_, line_, "Test failed: " + msg); + } else { + ::mscclpp::test::utils::reportFailure(file_, line_, "Test failed"); + } + throw std::runtime_error("Test failed"); + } + + private: + const char* file_; + int line_; + std::ostringstream message_; +}; + +// Helper class for SKIP_TEST() macro — supports message streaming via operator<< +// Usage: SKIP_TEST() << "Reason for skipping"; +class SkipHelper { + public: + explicit SkipHelper(const char* file, int line) : file_(file), line_(line) {} + template + SkipHelper& operator<<(const T& value) { + message_ << value; + return *this; + } + ~SkipHelper() noexcept(false) { + std::string msg = message_.str(); + if (!msg.empty()) { + throw SkipException("Test skipped: " + msg); + } else { + throw SkipException("Test skipped"); + } + } + + private: + const char* file_; + int line_; + std::ostringstream message_; +}; + +// SFINAE helper: resolves to T if T is a complete type (user-defined fixture), +// otherwise falls back to TestCase. This lets TEST() work with or without a fixture class. +namespace detail { +template +using void_t = void; + +template > +struct FixtureOf { + using type = TestCase; +}; +template +struct FixtureOf> { + using type = T; +}; +} // namespace detail + +} // namespace test +} // namespace mscclpp + +// --- Test registration macros --- +// TEST(Suite, Name): if Suite is a previously-defined class, the test inherits from it (fixture). +// Otherwise, the test inherits from TestCase (no fixture needed). + +#define TEST(test_fixture, test_name) \ + class test_fixture; \ + class test_fixture##_##test_name##_Test : public ::mscclpp::test::detail::FixtureOf::type { \ + public: \ + void TestBody() override; \ + }; \ + static bool test_fixture##_##test_name##_registered = []() { \ + ::mscclpp::test::TestRegistry::instance().registerTest( \ + #test_fixture, #test_name, \ + []() -> ::mscclpp::test::TestCase* { return new test_fixture##_##test_name##_Test(); }); \ + return true; \ + }(); \ + void test_fixture##_##test_name##_Test::TestBody() + +#define PERF_TEST(test_fixture, test_name) \ + class test_fixture; \ + class test_fixture##_##test_name##_Test : public ::mscclpp::test::detail::FixtureOf::type { \ + public: \ + void TestBody() override; \ + }; \ + static bool test_fixture##_##test_name##_registered = []() { \ + ::mscclpp::test::TestRegistry::instance().registerTest( \ + #test_fixture, #test_name, \ + []() -> ::mscclpp::test::TestCase* { return new test_fixture##_##test_name##_Test(); }, true); \ + return true; \ + }(); \ + void test_fixture##_##test_name##_Test::TestBody() + +// --- Test runner macro --- +#define RUN_ALL_TESTS() ::mscclpp::test::TestRegistry::instance().runAllTests(argc, argv) + +// Assertion macros +#define EXPECT_TRUE(condition) \ + do { \ + if (!(condition)) { \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be true"); \ + } \ + } while (0) + +#define EXPECT_FALSE(condition) \ + do { \ + if (condition) { \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be false"); \ + } \ + } while (0) + +#define EXPECT_EQ(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 == v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " == " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + } \ + } while (0) + +#define EXPECT_NE(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 != v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " != " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + } \ + } while (0) + +#define EXPECT_LT(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 < v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " < " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + } \ + } while (0) + +#define EXPECT_LE(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 <= v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " <= " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + } \ + } while (0) + +#define EXPECT_GT(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 > v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " > " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + } \ + } while (0) + +#define EXPECT_GE(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 >= v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " >= " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + } \ + } while (0) + +#define ASSERT_TRUE(condition) \ + do { \ + if (!(condition)) { \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be true"); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_FALSE(condition) \ + do { \ + if (condition) { \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, "Expected: " #condition " to be false"); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_EQ(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 == v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " == " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_NE(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 != v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " != " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_LT(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 < v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " < " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_LE(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 <= v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " <= " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_GT(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 > v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " > " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_GE(val1, val2) \ + do { \ + auto v1 = (val1); \ + auto v2 = (val2); \ + if (!(v1 >= v2)) { \ + std::ostringstream oss; \ + oss << "Expected: " #val1 " >= " #val2 << "\n Actual: " << v1 << " vs " << v2; \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +#define ASSERT_NO_THROW(statement) \ + do { \ + try { \ + statement; \ + } catch (const std::exception& e) { \ + std::ostringstream oss; \ + oss << "Expected: " #statement " not to throw\n Actual: threw " << e.what(); \ + ::mscclpp::test::utils::reportFailure(__FILE__, __LINE__, oss.str()); \ + throw std::runtime_error("Test assertion failed"); \ + } catch (...) { \ + ::mscclpp::test::utils::reportFailure( \ + __FILE__, __LINE__, "Expected: " #statement " not to throw\n Actual: threw unknown exception"); \ + throw std::runtime_error("Test assertion failed"); \ + } \ + } while (0) + +// --- Test control macros --- + +// Fail the current test immediately. Usage: FAIL() << "reason"; +#define FAIL() ::mscclpp::test::FailHelper(__FILE__, __LINE__) + +// Skip the current test. Usage: SKIP_TEST() << "reason"; +#define SKIP_TEST() ::mscclpp::test::SkipHelper(__FILE__, __LINE__) + +#endif // MSCCLPP_TEST_FRAMEWORK_HPP_ diff --git a/test/mp_unit/bootstrap_tests.cc b/test/mp_unit/bootstrap_tests.cc index 4bbab2f18..c28087a45 100644 --- a/test/mp_unit/bootstrap_tests.cc +++ b/test/mp_unit/bootstrap_tests.cc @@ -1,5 +1,5 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. #include @@ -48,7 +48,7 @@ void BootstrapTest::bootstrapTestAll(std::shared_ptr bootstr bootstrapTestSendRecv(bootstrap); } -TEST_F(BootstrapTest, WithId) { +TEST(BootstrapTest, WithId) { auto bootstrap = std::make_shared(gEnv->rank, gEnv->worldSize); mscclpp::UniqueId id; if (bootstrap->getRank() == 0) id = bootstrap->createUniqueId(); @@ -57,13 +57,13 @@ TEST_F(BootstrapTest, WithId) { bootstrapTestAll(bootstrap); } -TEST_F(BootstrapTest, WithIpPortPair) { +TEST(BootstrapTest, WithIpPortPair) { auto bootstrap = std::make_shared(gEnv->rank, gEnv->worldSize); bootstrap->initialize(gEnv->args["ip_port"]); bootstrapTestAll(bootstrap); } -TEST_F(BootstrapTest, ResumeWithId) { +TEST(BootstrapTest, ResumeWithId) { // This test may take a few minutes. bootstrapTestTimer.set(300); @@ -76,19 +76,19 @@ TEST_F(BootstrapTest, ResumeWithId) { } } -TEST_F(BootstrapTest, ResumeWithIpPortPair) { +TEST(BootstrapTest, ResumeWithIpPortPair) { for (int i = 0; i < 5; ++i) { auto bootstrap = std::make_shared(gEnv->rank, gEnv->worldSize); bootstrap->initialize(gEnv->args["ip_port"]); } } -TEST_F(BootstrapTest, ExitBeforeConnect) { +TEST(BootstrapTest, ExitBeforeConnect) { auto bootstrap = std::make_shared(gEnv->rank, gEnv->worldSize); bootstrap->createUniqueId(); } -TEST_F(BootstrapTest, TimeoutWithId) { +TEST(BootstrapTest, TimeoutWithId) { mscclpp::Timer timer; // All ranks initialize a bootstrap with their own id (will hang) @@ -99,7 +99,7 @@ TEST_F(BootstrapTest, TimeoutWithId) { // Set bootstrap timeout to 1 second bootstrap->initialize(id, 1); } catch (const mscclpp::Error& e) { - ASSERT_EQ(e.getErrorCode(), mscclpp::ErrorCode::Timeout); + ASSERT_TRUE(e.getErrorCode() == mscclpp::ErrorCode::Timeout); } // Timeout should be sligtly greater than 1 second @@ -139,7 +139,7 @@ class MPIBootstrap : public mscclpp::Bootstrap { } }; -TEST_F(BootstrapTest, MPIBootstrap) { +TEST(BootstrapTest, MPIBootstrap) { auto bootstrap = std::make_shared(); bootstrapTestAll(bootstrap); } diff --git a/test/mp_unit/communicator_tests.cu b/test/mp_unit/communicator_tests.cu index 9d83532a1..066c5514c 100644 --- a/test/mp_unit/communicator_tests.cu +++ b/test/mp_unit/communicator_tests.cu @@ -1,5 +1,5 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. #include @@ -185,7 +185,7 @@ bool CommunicatorTest::testWriteCorrectness(bool skipLocal) { return true; } -TEST_F(CommunicatorTest, BasicWrite) { +TEST(CommunicatorTest, BasicWrite) { if (gEnv->rank >= numRanksToUse) return; deviceBufferInit(); @@ -215,7 +215,7 @@ __global__ void kernelWaitSemaphores(mscclpp::Host2DeviceSemaphore::DeviceHandle } } -TEST_F(CommunicatorTest, WriteWithDeviceSemaphores) { +TEST(CommunicatorTest, WriteWithDeviceSemaphores) { if (gEnv->rank >= numRanksToUse) return; std::unordered_map> semaphores; @@ -254,7 +254,7 @@ TEST_F(CommunicatorTest, WriteWithDeviceSemaphores) { communicator->bootstrap()->barrier(); } -TEST_F(CommunicatorTest, WriteWithHostSemaphores) { +TEST(CommunicatorTest, WriteWithHostSemaphores) { if (gEnv->rank >= numRanksToUse) return; std::unordered_map> semaphores; diff --git a/test/mp_unit/executor_tests.cc b/test/mp_unit/executor_tests.cc index a903ed08d..4f3f25451 100644 --- a/test/mp_unit/executor_tests.cc +++ b/test/mp_unit/executor_tests.cc @@ -1,7 +1,8 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. #include +#include #include #include @@ -22,7 +23,7 @@ std::string getExecutablePath() { void ExecutorTest::SetUp() { if (gEnv->worldSize != 2 || gEnv->nRanksPerNode != 2) { - GTEST_SKIP() << "This test requires world size to be 2 and ranks per node to be 2"; + SKIP_TEST() << "This test requires world size to be 2 and ranks per node to be 2"; } MultiProcessTest::SetUp(); @@ -49,7 +50,7 @@ void ExecutorTest::TearDown() { MultiProcessTest::TearDown(); } -TEST_F(ExecutorTest, TwoNodesAllreduce) { +TEST(ExecutorTest, TwoNodesAllreduce) { std::string executablePath = getExecutablePath(); std::filesystem::path path = executablePath; std::filesystem::path executionFilesPath = diff --git a/test/mp_unit/ib_tests.cu b/test/mp_unit/ib_tests.cu index 051030ac8..04ab402dd 100644 --- a/test/mp_unit/ib_tests.cu +++ b/test/mp_unit/ib_tests.cu @@ -1,5 +1,5 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. #include @@ -18,9 +18,7 @@ void IbTestBase::SetUp() { } void IbPeerToPeerTest::SetUp() { -#if !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) + REQUIRE_IBVERBS; IbTestBase::SetUp(); @@ -80,7 +78,7 @@ void IbPeerToPeerTest::stageSendWriteWithImm(uint32_t size, uint64_t wrId, uint6 qp->stageSendWriteWithImm(mr.get(), remoteMrInfo, size, wrId, srcOffset, dstOffset, signaled, immData); } -TEST_F(IbPeerToPeerTest, SimpleSendRecv) { +TEST(IbPeerToPeerTest, SimpleSendRecv) { if (gEnv->rank >= 2) { // This test needs only two ranks return; @@ -195,7 +193,7 @@ __global__ void kernelMemoryConsistency(uint64_t* data, volatile uint64_t* curIt } } -TEST_F(IbPeerToPeerTest, MemoryConsistency) { +TEST(IbPeerToPeerTest, MemoryConsistency) { if (gEnv->rank >= 2) { // This test needs only two ranks return; @@ -303,7 +301,7 @@ TEST_F(IbPeerToPeerTest, MemoryConsistency) { EXPECT_EQ(res, 0); } -TEST_F(IbPeerToPeerTest, SimpleAtomicAdd) { +TEST(IbPeerToPeerTest, SimpleAtomicAdd) { if (gEnv->rank >= 2) { // This test needs only two ranks return; diff --git a/test/mp_unit/memory_channel_tests.cu b/test/mp_unit/memory_channel_tests.cu index f6ef3aedc..318d301af 100644 --- a/test/mp_unit/memory_channel_tests.cu +++ b/test/mp_unit/memory_channel_tests.cu @@ -1,5 +1,5 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. #include @@ -8,7 +8,7 @@ void MemoryChannelOneToOneTest::SetUp() { // Need at least two ranks within a node if (gEnv->nRanksPerNode < 2) { - GTEST_SKIP(); + SKIP_TEST(); } // Use only two ranks setNumRanksToUse(2); @@ -88,27 +88,12 @@ void MemoryChannelOneToOneTest::packetPingPongTest(const std::string testName, std::shared_ptr ret = mscclpp::detail::gpuCallocHostShared(); // The least nelem is 2 for packet ping pong - kernelWrapper(buff.get(), gEnv->rank, 2, ret.get(), defaultNTries); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - *ret = 0; - - kernelWrapper(buff.get(), gEnv->rank, 1024, ret.get(), defaultNTries); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelWrapper(buff.get(), gEnv->rank, 1024 * 1024, ret.get(), defaultNTries); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelWrapper(buff.get(), gEnv->rank, 4 * 1024 * 1024, ret.get(), defaultNTries); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; + for (int nElem : {2, 1024, 1024 * 1024, 4 * 1024 * 1024}) { + *ret = 0; + kernelWrapper(buff.get(), gEnv->rank, nElem, ret.get(), defaultNTries); + MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); + EXPECT_EQ(*ret, 0); + } int nTries = 1000000; communicator->bootstrap()->barrier(); @@ -169,7 +154,7 @@ __global__ void kernelMemPutPingPong(int* buff, int rank, int nElem, int* ret) { } } -TEST_F(MemoryChannelOneToOneTest, PutPingPong) { +TEST(MemoryChannelOneToOneTest, PutPingPong) { if (gEnv->rank >= numRanksToUse) return; const int nElem = 4 * 1024 * 1024; @@ -187,28 +172,12 @@ TEST_F(MemoryChannelOneToOneTest, PutPingPong) { std::shared_ptr ret = mscclpp::detail::gpuCallocHostShared(); - kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024 * 1024, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 4 * 1024 * 1024, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); + for (int nElem : {1, 1024, 1024 * 1024, 4 * 1024 * 1024}) { + *ret = 0; + kernelMemPutPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, nElem, ret.get()); + MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); + EXPECT_EQ(*ret, 0); + } } __global__ void kernelMemGetPingPong(int* buff, int rank, int nElem, int* ret) { @@ -248,7 +217,7 @@ __global__ void kernelMemGetPingPong(int* buff, int rank, int nElem, int* ret) { } } -TEST_F(MemoryChannelOneToOneTest, GetPingPong) { +TEST(MemoryChannelOneToOneTest, GetPingPong) { if (gEnv->rank >= numRanksToUse) return; const int nElem = 4 * 1024 * 1024; @@ -266,28 +235,12 @@ TEST_F(MemoryChannelOneToOneTest, GetPingPong) { std::shared_ptr ret = mscclpp::detail::gpuCallocHostShared(); - kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024 * 1024, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 4 * 1024 * 1024, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); + for (int nElem : {1, 1024, 1024 * 1024, 4 * 1024 * 1024}) { + *ret = 0; + kernelMemGetPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, nElem, ret.get()); + MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); + EXPECT_EQ(*ret, 0); + } } __global__ void kernelMemLL8PacketPingPong(int* buff, int rank, int nElem, int* ret, int nTries) { @@ -371,14 +324,14 @@ __global__ void kernelMemLL16PacketPingPong(int* buff, int rank, int nElem, int* } } -TEST_F(MemoryChannelOneToOneTest, LL8PacketPingPong) { +TEST(MemoryChannelOneToOneTest, LL8PacketPingPong) { auto kernelMemLL8PacketPingPongWrapper = [](int* buff, int rank, int nElem, int* ret, int nTries) { kernelMemLL8PacketPingPong<<<1, 1024>>>(buff, rank, nElem, ret, nTries); }; packetPingPongTest("memoryLL8PacketPingPong", kernelMemLL8PacketPingPongWrapper); } -TEST_F(MemoryChannelOneToOneTest, LL16PacketPingPong) { +TEST(MemoryChannelOneToOneTest, LL16PacketPingPong) { auto kernelMemLL16PacketPingPongWrapper = [](int* buff, int rank, int nElem, int* ret, int nTries) { kernelMemLL16PacketPingPong<<<1, 1024>>>(buff, rank, nElem, ret, nTries); }; diff --git a/test/mp_unit/mp_unit_tests.cc b/test/mp_unit/mp_unit_tests.cc index cafd9bbca..2f6dc1cab 100644 --- a/test/mp_unit/mp_unit_tests.cc +++ b/test/mp_unit/mp_unit_tests.cc @@ -98,14 +98,18 @@ static std::unordered_map parseArgs(int argc, const ch continue; } - // Unrecognized positional token: ignore to keep parser permissive for gtest/MPI extras + // Unrecognized positional token: ignore } return options; } void MultiProcessTestEnv::SetUp() { - MPI_Init(NULL, NULL); + int initialized = 0; + MPI_Initialized(&initialized); + if (!initialized) { + MPI_Init(NULL, NULL); + } MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &worldSize); // get the local number of nodes with MPI @@ -128,18 +132,17 @@ void MultiProcessTest::TearDown() { } int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); gEnv = new MultiProcessTestEnv(argc, (const char**)argv); - ::testing::AddGlobalTestEnvironment(gEnv); + ::mscclpp::test::TestRegistry::instance().addEnvironment(gEnv); return RUN_ALL_TESTS(); } -TEST_F(MultiProcessTest, Prelim) { +TEST(MultiProcessTest, Prelim) { // Test to make sure the MPI environment is set up correctly ASSERT_GE(gEnv->worldSize, 2); } -TEST_F(MultiProcessTest, HostName) { +TEST(MultiProcessTest, HostName) { const size_t maxNameLen = 1024; std::vector buffer(gEnv->worldSize * maxNameLen, '\0'); std::string hostName = mscclpp::getHostName(maxNameLen, '\0'); @@ -159,7 +162,7 @@ TEST_F(MultiProcessTest, HostName) { } } -TEST_F(MultiProcessTest, HostHash) { +TEST(MultiProcessTest, HostHash) { std::vector buffer(gEnv->worldSize, 0); uint64_t hostHash = mscclpp::getHostHash(); buffer[gEnv->rank] = hostHash; diff --git a/test/mp_unit/mp_unit_tests.hpp b/test/mp_unit/mp_unit_tests.hpp index 17046a576..03e4cbde9 100644 --- a/test/mp_unit/mp_unit_tests.hpp +++ b/test/mp_unit/mp_unit_tests.hpp @@ -4,8 +4,6 @@ #ifndef MSCCLPP_MP_UNIT_TESTS_HPP_ #define MSCCLPP_MP_UNIT_TESTS_HPP_ -#include - #include #include #include @@ -13,10 +11,18 @@ #include #include +#include "../framework.hpp" #include "ib.hpp" #include "utils_internal.hpp" -class MultiProcessTestEnv : public ::testing::Environment { +// Skip the current test if IBVerbs is not available in this build +#if defined(USE_IBVERBS) +#define REQUIRE_IBVERBS +#else +#define REQUIRE_IBVERBS SKIP_TEST() << "This test requires IBVerbs that the current build does not support." +#endif + +class MultiProcessTestEnv : public ::mscclpp::test::Environment { public: MultiProcessTestEnv(int argc, const char** argv); @@ -37,7 +43,7 @@ mscclpp::Transport ibIdToTransport(int id); int rankToLocalRank(int rank); int rankToNode(int rank); -class MultiProcessTest : public ::testing::Test { +class MultiProcessTest : public ::mscclpp::test::TestCase { protected: void TearDown() override; }; diff --git a/test/mp_unit/port_channel_tests.cu b/test/mp_unit/port_channel_tests.cu index 7cc5954a8..764c32999 100644 --- a/test/mp_unit/port_channel_tests.cu +++ b/test/mp_unit/port_channel_tests.cu @@ -1,5 +1,5 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. #include #include @@ -178,26 +178,12 @@ void PortChannelOneToOneTest::testPingPong(PingPongTestParams params) { std::shared_ptr ret = mscclpp::detail::gpuCallocHostShared(); const int nTries = 1000; - - kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1, params.waitWithPoll, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - - kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024, params.waitWithPoll, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - - kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 1024 * 1024, params.waitWithPoll, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - - kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, 4 * 1024 * 1024, params.waitWithPoll, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); + for (int nElem : {1, 1024, 1024 * 1024, 4 * 1024 * 1024}) { + *ret = 0; + kernelProxyPingPong<<<1, 1024>>>(buff.get(), gEnv->rank, nElem, params.waitWithPoll, nTries, ret.get()); + MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); + EXPECT_EQ(*ret, 0); + } proxyService->stopProxy(); } @@ -223,8 +209,7 @@ void PortChannelOneToOneTest::testPingPongPerf(PingPongTestParams params) { std::shared_ptr ret = mscclpp::detail::gpuCallocHostShared(); - auto* testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); - const std::string testName = std::string(testInfo->test_suite_name()) + "." + std::string(testInfo->name()); + const std::string testName = ::mscclpp::test::currentTestName(); const int nTries = 1000; // Warm-up @@ -247,63 +232,51 @@ void PortChannelOneToOneTest::testPingPongPerf(PingPongTestParams params) { proxyService->stopProxy(); } -TEST_F(PortChannelOneToOneTest, PingPong) { +TEST(PortChannelOneToOneTest, PingPong) { testPingPong(PingPongTestParams{ .useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Default}); } -TEST_F(PortChannelOneToOneTest, PingPongIbHostMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PingPongIbHostMode) { + REQUIRE_IBVERBS; testPingPong(PingPongTestParams{ .useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Host}); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PingPongEthernet) { +TEST(PortChannelOneToOneTest, PingPongEthernet) { testPingPong(PingPongTestParams{ .useIPC = false, .useIB = false, .useEthernet = true, .waitWithPoll = false, .ibMode = IbMode::Default}); } -TEST_F(PortChannelOneToOneTest, PingPongWithPoll) { +TEST(PortChannelOneToOneTest, PingPongWithPoll) { testPingPong(PingPongTestParams{ .useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = true, .ibMode = IbMode::Default}); } -TEST_F(PortChannelOneToOneTest, PingPongIbHostModeWithPoll) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PingPongIbHostModeWithPoll) { + REQUIRE_IBVERBS; testPingPong(PingPongTestParams{ .useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = true, .ibMode = IbMode::Host}); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PingPongPerf) { +TEST(PortChannelOneToOneTest, PingPongPerf) { testPingPongPerf(PingPongTestParams{ .useIPC = true, .useIB = false, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Default}); } -TEST_F(PortChannelOneToOneTest, PingPongPerfIbHostMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PingPongPerfIbHostMode) { + REQUIRE_IBVERBS; testPingPongPerf(PingPongTestParams{ .useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::Host}); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PingPongPerfIbHostNoAtomicMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PingPongPerfIbHostNoAtomicMode) { + REQUIRE_IBVERBS; testPingPongPerf(PingPongTestParams{ .useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::HostNoAtomic}); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PingPongPerfEthernet) { +TEST(PortChannelOneToOneTest, PingPongPerfEthernet) { testPingPongPerf(PingPongTestParams{ .useIPC = false, .useIB = false, .useEthernet = true, .waitWithPoll = false, .ibMode = IbMode::Default}); } @@ -407,34 +380,14 @@ void PortChannelOneToOneTest::testPacketPingPong(bool useIb, IbMode ibMode) { std::shared_ptr ret = mscclpp::detail::gpuCallocHostShared(); const int nTries = 1000; - // The least nelem is 2 for packet ping pong - kernelProxyLLPingPong - <<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, 2, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelProxyLLPingPong - <<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, 1024, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelProxyLLPingPong<<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, - 1024 * 1024, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); - *ret = 0; - - kernelProxyLLPingPong<<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, - 4 * 1024 * 1024, nTries, ret.get()); - MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); - - EXPECT_EQ(*ret, 0); + for (int nElem : {2, 1024, 1024 * 1024, 4 * 1024 * 1024}) { + *ret = 0; + kernelProxyLLPingPong + <<<1, 1024>>>(buff.get(), putPacketBuffer.get(), getPacketBuffer.get(), gEnv->rank, nElem, nTries, ret.get()); + MSCCLPP_CUDATHROW(cudaDeviceSynchronize()); + EXPECT_EQ(*ret, 0); + } communicator->bootstrap()->barrier(); @@ -471,8 +424,7 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIb, IbMode ibMode) proxyService->startProxy(); - auto* testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); - const std::string testName = std::string(testInfo->test_suite_name()) + "." + std::string(testInfo->name()); + const std::string testName = ::mscclpp::test::currentTestName(); const int nTries = 1000000; // Warm-up @@ -497,47 +449,32 @@ void PortChannelOneToOneTest::testPacketPingPongPerf(bool useIb, IbMode ibMode) proxyService->stopProxy(); } -TEST_F(PortChannelOneToOneTest, PacketPingPong) { testPacketPingPong(false, IbMode::Default); } +TEST(PortChannelOneToOneTest, PacketPingPong) { testPacketPingPong(false, IbMode::Default); } -TEST_F(PortChannelOneToOneTest, PacketPingPongIbHostMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PacketPingPongIbHostMode) { + REQUIRE_IBVERBS; testPacketPingPong(true, IbMode::Host); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PacketPingPongPerf) { testPacketPingPongPerf(false, IbMode::Default); } +TEST(PortChannelOneToOneTest, PacketPingPongPerf) { testPacketPingPongPerf(false, IbMode::Default); } -TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIbHostMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PacketPingPongPerfIbHostMode) { + REQUIRE_IBVERBS; testPacketPingPongPerf(true, IbMode::Host); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PacketPingPongPerfIbHostNoAtomicMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PacketPingPongPerfIbHostNoAtomicMode) { + REQUIRE_IBVERBS; testPacketPingPongPerf(true, IbMode::HostNoAtomic); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PingPongIbHostNoAtomicMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PingPongIbHostNoAtomicMode) { + REQUIRE_IBVERBS; testPingPong(PingPongTestParams{ .useIPC = false, .useIB = true, .useEthernet = false, .waitWithPoll = false, .ibMode = IbMode::HostNoAtomic}); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } -TEST_F(PortChannelOneToOneTest, PacketPingPongIbHostNoAtomicMode) { -#if defined(USE_IBVERBS) +TEST(PortChannelOneToOneTest, PacketPingPongIbHostNoAtomicMode) { + REQUIRE_IBVERBS; testPacketPingPong(true, IbMode::HostNoAtomic); -#else // !defined(USE_IBVERBS) - GTEST_SKIP() << "This test requires IBVerbs that the current build does not support."; -#endif // !defined(USE_IBVERBS) } diff --git a/test/mp_unit/switch_channel_tests.cu b/test/mp_unit/switch_channel_tests.cu index a12919e36..6d913c649 100644 --- a/test/mp_unit/switch_channel_tests.cu +++ b/test/mp_unit/switch_channel_tests.cu @@ -1,5 +1,5 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. #include #include @@ -10,10 +10,10 @@ void SwitchChannelTest::SetUp() { // Need at least two ranks within a node if (gEnv->nRanksPerNode < 2) { - GTEST_SKIP(); + SKIP_TEST(); } if (!mscclpp::isNvlsSupported()) { - GTEST_SKIP(); + SKIP_TEST(); } // Use only two ranks setNumRanksToUse(2); @@ -23,6 +23,8 @@ void SwitchChannelTest::SetUp() { void SwitchChannelTest::TearDown() { CommunicatorTestBase::TearDown(); } __constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan; +__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan1; +__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan2; __global__ void kernelSwitchReduce() { #if (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900) @@ -31,7 +33,16 @@ __global__ void kernelSwitchReduce() { #endif // (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900) } -TEST_F(SwitchChannelTest, SimpleAllReduce) { +__global__ void kernelSwitchReduceTwo() { +#if (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900) + auto val1 = gConstSwitchChan1.reduce(0); + gConstSwitchChan1.broadcast(0, val1); + auto val2 = gConstSwitchChan2.reduce(0); + gConstSwitchChan2.broadcast(0, val2); +#endif // (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900) +} + +TEST(SwitchChannelTest, SimpleAllReduce) { if (gEnv->rank >= numRanksToUse) return; std::vector ranks; @@ -66,22 +77,13 @@ TEST_F(SwitchChannelTest, SimpleAllReduce) { for (int i = 0; i < numRanksToUse; i++) { expected += i + 1.0f; } - ASSERT_EQ(result, expected) << "Expected " << expected << " but got " << result << " for rank " << gEnv->rank; -} - -__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan1; -__constant__ mscclpp::SwitchChannelDeviceHandle gConstSwitchChan2; - -__global__ void kernelSwitchReduceTwo() { -#if (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900) - auto val1 = gConstSwitchChan1.reduce(0); - gConstSwitchChan1.broadcast(0, val1); - auto val2 = gConstSwitchChan2.reduce(0); - gConstSwitchChan2.broadcast(0, val2); -#endif // (CUDA_NVLS_API_AVAILABLE) && (__CUDA_ARCH__ >= 900) + if (result != expected) { + std::cerr << "Expected " << expected << " but got " << result << " for rank " << gEnv->rank << std::endl; + } + ASSERT_EQ(result, expected); } -TEST_F(SwitchChannelTest, TwoChannelsSameConnection) { +TEST(SwitchChannelTest, TwoChannelsSameConnection) { if (gEnv->rank >= numRanksToUse) return; std::vector ranks; @@ -97,12 +99,9 @@ TEST_F(SwitchChannelTest, TwoChannelsSameConnection) { MSCCLPP_CUDATHROW(cudaMemcpy(buffer1.data(), &data1, sizeof(data1), cudaMemcpyHostToDevice)); MSCCLPP_CUDATHROW(cudaMemcpy(buffer2.data(), &data2, sizeof(data2), cudaMemcpyHostToDevice)); - // Connection size must be large enough for two granularity-aligned buffers. - // The multicast granularity is typically 2MB, so we need at least 2 * 2MB. const size_t connSize = buffer1.bytes() + buffer2.bytes(); auto nvlsConnection = mscclpp::connectNvlsCollective(communicator, ranks, connSize); - // Bind two separate buffers to the same connection auto switchChannel1 = nvlsConnection->bindAllocatedMemory(CUdeviceptr(buffer1.data()), bufSize); auto switchChannel2 = nvlsConnection->bindAllocatedMemory(CUdeviceptr(buffer2.data()), bufSize); @@ -132,6 +131,6 @@ TEST_F(SwitchChannelTest, TwoChannelsSameConnection) { expected1 += (i + 1.0f) * 1.0f; expected2 += (i + 1.0f) * 10.0f; } - ASSERT_EQ(result1, expected1) << "Channel1: expected " << expected1 << " but got " << result1; - ASSERT_EQ(result2, expected2) << "Channel2: expected " << expected2 << " but got " << result2; + ASSERT_EQ(result1, expected1); + ASSERT_EQ(result2, expected2); } diff --git a/test/perf/CMakeLists.txt b/test/perf/CMakeLists.txt deleted file mode 100644 index 6a16c0345..000000000 --- a/test/perf/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -# Find required packages -find_package(MPI REQUIRED) - -# Note: nlohmann_json::nlohmann_json target is already available from the main project - -# Set up common libraries and includes for tests -set(PERF_TEST_LIBS_COMMON mscclpp ${GPU_LIBRARIES} ${NUMA_LIBRARIES} Threads::Threads MPI::MPI_CXX) -if(MSCCLPP_USE_IB) - list(APPEND PERF_TEST_LIBS_COMMON ${IBVERBS_LIBRARIES}) -endif() - -set(PERF_TEST_INC_COMMON - PRIVATE ${PROJECT_SOURCE_DIR}/include - SYSTEM PRIVATE ${GPU_INCLUDE_DIRS}) - -# Function to add a test executable -function(add_perf_test_executable name sources) - if(MSCCLPP_USE_ROCM) - set_source_files_properties(${sources} PROPERTIES LANGUAGE CXX) - endif() - add_executable(${name} ${sources}) - target_link_libraries(${name} ${PERF_TEST_LIBS_COMMON}) - - # Link nlohmann_json - use the target from main project - target_link_libraries(${name} nlohmann_json::nlohmann_json) - - if(MSCCLPP_USE_IB) - target_compile_definitions(${name} PRIVATE USE_IBVERBS) - endif() - - target_include_directories(${name} ${PERF_TEST_INC_COMMON}) - target_compile_definitions(${name} PRIVATE MSCCLPP_USE_MPI_FOR_TESTS) - - # Set C++ standard - target_compile_features(${name} PRIVATE cxx_std_17) - - set_target_properties(${name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/perf") -endfunction() - -# Add FIFO test -add_perf_test_executable(fifo_test "framework.cc;fifo_test.cu") diff --git a/test/perf/fifo_test.cu b/test/perf/fifo_test.cu deleted file mode 100644 index bb77a1067..000000000 --- a/test/perf/fifo_test.cu +++ /dev/null @@ -1,298 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "framework.hpp" - -using namespace mscclpp::test; - -// Constants for timeout and trigger calculation -constexpr uint64_t TIMEOUT_SPINS = 1000000; -constexpr int MIN_TRIGGERS = 1000; -constexpr int MIN_WARMUP_TRIGGERS = 100; -constexpr int TRIGGERS_PER_FIFO_SIZE = 10; -constexpr int WARMUP_TRIGGERS_PER_FIFO_SIZE = 2; - -__constant__ mscclpp::FifoDeviceHandle gFifoDeviceHandle; - -__global__ void kernelFifoPush(size_t numTriggers) { - mscclpp::FifoDeviceHandle& fifo = gFifoDeviceHandle; - int tid = threadIdx.x + blockIdx.x * blockDim.x; - mscclpp::ProxyTrigger trigger; - for (size_t i = 1; i <= numTriggers; ++i) { - trigger.fst = i; - trigger.snd = tid ^ i; - fifo.push(trigger); - } -} - -__global__ void kernelFifoPushSync(size_t numTriggers) { - mscclpp::FifoDeviceHandle& fifo = gFifoDeviceHandle; - mscclpp::ProxyTrigger trigger; - int tid = threadIdx.x + blockIdx.x * blockDim.x; - for (size_t i = 1; i <= numTriggers; ++i) { - trigger.fst = i; - trigger.snd = tid ^ i; - fifo.sync(fifo.push(trigger)); - } -} - -static void setupCuda(int& cudaDevice, int& numaNode) { - utils::CUDA_CHECK(cudaGetDevice(&cudaDevice)); - numaNode = mscclpp::getDeviceNumaNode(cudaDevice); - mscclpp::numaBind(numaNode); -} - -// Helper function to consume triggers from FIFO -static bool consumeTriggers(std::unique_ptr& hostFifo, int numTriggers, int parallel) { - int totalTriggers = numTriggers * parallel; - std::unordered_map triggerCounts; - for (int i = 0; i < totalTriggers; ++i) { - mscclpp::ProxyTrigger trigger; - uint64_t spin = 0; - do { - trigger = hostFifo->poll(); - if (spin++ > TIMEOUT_SPINS) { - return false; - } - } while (trigger.fst == 0 || trigger.snd == 0); - - // Process trigger (see src/proxy.cc) - trigger.snd ^= ((uint64_t)1 << (uint64_t)63); - trigger.snd = trigger.snd ^ trigger.fst; - assert(triggerCounts[trigger.snd] + 1 == trigger.fst); - triggerCounts[trigger.snd]++; - hostFifo->pop(); - } - return true; -} - -// Helper function to run a single kernel variant and return performance metrics -std::tuple runSingleKernelVariant(void (*kernel)(size_t), - std::unique_ptr& hostFifo, - cudaStream_t stream, int numParallel) { - // Calculate triggers based on FIFO size - const int numTriggers = std::max(MIN_TRIGGERS, static_cast(hostFifo->size() * TRIGGERS_PER_FIFO_SIZE)); - const int warmupTriggers = - std::max(MIN_WARMUP_TRIGGERS, static_cast(hostFifo->size() * WARMUP_TRIGGERS_PER_FIFO_SIZE)); - - // Warmup - kernel<<>>(warmupTriggers); - utils::CUDA_CHECK(cudaGetLastError()); - - // Process warmup triggers (note: total triggers = warmupTriggers * numParallel) - if (!consumeTriggers(hostFifo, warmupTriggers, numParallel)) { - return {0.0, 0.0, 0, 0}; // Return error values - } - utils::CUDA_CHECK(cudaStreamSynchronize(stream)); - - // Benchmark - utils::Timer timer; - timer.start(); - - kernel<<>>(numTriggers); - utils::CUDA_CHECK(cudaGetLastError()); - - // Process all triggers - if (!consumeTriggers(hostFifo, numTriggers, numParallel)) { - return {0.0, 0.0, 0, 0}; - } - utils::CUDA_CHECK(cudaStreamSynchronize(stream)); - - timer.stop(); - - const int totalTriggers = numTriggers * numParallel; - double throughput = totalTriggers / timer.elapsedSeconds(); - double duration_us = timer.elapsedMicroseconds(); - - utils::CUDA_CHECK(cudaDeviceSynchronize()); - - return {throughput, duration_us, totalTriggers, warmupTriggers * numParallel}; -} - -void runFifoTestVariant(std::unique_ptr& hostFifo, cudaStream_t stream, int numParallel, - nlohmann::ordered_json& combinedMetrics) { - auto [pushThroughput, pushDuration, numTriggers, warmupTriggers] = - runSingleKernelVariant(kernelFifoPush, hostFifo, stream, numParallel); - - auto [syncThroughput, syncDuration, syncNumTriggers, syncWarmupTriggers] = - runSingleKernelVariant(kernelFifoPushSync, hostFifo, stream, numParallel); - - auto formatThroughput = [](double thru) { - return double(int(thru * 10)) / 10.0; // Round to 1 decimal place - }; - - std::string prefix = "p" + std::to_string(numParallel) + "_"; - combinedMetrics[prefix + "push_throughput"] = formatThroughput(pushThroughput); - combinedMetrics[prefix + "push_sync_throughput"] = formatThroughput(syncThroughput); - combinedMetrics[prefix + "push_duration_us"] = pushDuration; - combinedMetrics[prefix + "push_sync_duration_us"] = syncDuration; - combinedMetrics[prefix + "num_triggers"] = numTriggers; - combinedMetrics[prefix + "warmup_triggers"] = warmupTriggers; -} - -struct FifoTestConfig { - int fifoSize; - std::vector parallelismLevels; - - // Constructor with default parallelism levels - FifoTestConfig(int size, const std::vector& parallel = {1, 2, 4, 8, 16}) - : fifoSize(size), parallelismLevels(parallel) {} -}; - -void runFifoTest(const FifoTestConfig& config, [[maybe_unused]] int rank, [[maybe_unused]] int worldSize, - [[maybe_unused]] int localRank) { - if (config.fifoSize <= 0) { - throw std::invalid_argument("FIFO size must be positive"); - } - if (config.parallelismLevels.empty()) { - throw std::invalid_argument("At least one parallelism level must be specified"); - } - - int cudaDevice, numaNode; - setupCuda(cudaDevice, numaNode); - - auto hostFifo = std::make_unique(config.fifoSize); - - mscclpp::FifoDeviceHandle hostHandle = hostFifo->deviceHandle(); - utils::CUDA_CHECK(cudaMemcpyToSymbol(gFifoDeviceHandle, &hostHandle, sizeof(mscclpp::FifoDeviceHandle))); - - cudaStream_t stream; - utils::CUDA_CHECK(cudaStreamCreate(&stream)); - - // Create test name with parallelism range - std::string testName = "FifoTest_Size" + std::to_string(config.fifoSize) + "_Parallel"; - - // Add parallelism range to test name (e.g., "P1-16" or "P1-4-16-64") - if (!config.parallelismLevels.empty()) { - testName += std::to_string(config.parallelismLevels.front()); - if (config.parallelismLevels.size() > 1) { - testName += "-" + std::to_string(config.parallelismLevels.back()); - - // If parallelism levels have non-standard steps, include more detail - if (config.parallelismLevels.size() > 2 && - (config.parallelismLevels[1] != 2 * config.parallelismLevels[0] || config.parallelismLevels.size() > 3)) { - testName = "FifoTest_Size" + std::to_string(config.fifoSize) + "_ParallelCustom"; - } - } - } - - // Print test configuration - if (utils::isMainRank()) { - std::stringstream ss; - ss << "Running FIFO test with size=" << config.fifoSize << ", parallelism_levels=["; - for (size_t i = 0; i < config.parallelismLevels.size(); ++i) { - if (i > 0) ss << ","; - ss << config.parallelismLevels[i]; - } - ss << "]"; - std::cout << ss.str() << std::endl; - } - - nlohmann::ordered_json combinedMetrics; - - for (int numParallel : config.parallelismLevels) { - runFifoTestVariant(hostFifo, stream, numParallel, combinedMetrics); - } - - std::map testParams; - testParams["fifo_size"] = std::to_string(static_cast(hostFifo->size())); - - // Add parallelism levels to test parameters - std::stringstream parallelismStream; - for (size_t i = 0; i < config.parallelismLevels.size(); ++i) { - if (i > 0) parallelismStream << ","; - parallelismStream << config.parallelismLevels[i]; - } - testParams["parallelism_levels"] = parallelismStream.str(); - - utils::recordResult(testName, "fifo", combinedMetrics, testParams); - - utils::CUDA_CHECK(cudaStreamDestroy(stream)); -} - -void runAllFifoTests([[maybe_unused]] int rank, [[maybe_unused]] int worldSize, [[maybe_unused]] int localRank) { - // clang-format off - std::vector configs = { - {1, {1}}, - {128, {1, 8, 64, 128}}, - {512, {1, 8, 64, 256, 512}}, - }; - // clang-format on - - for (const auto& config : configs) { - runFifoTest(config, rank, worldSize, localRank); - } -} - -static void printUsage(char* argv0) { - std::stringstream ss; - ss << "Usage: " << argv0 << " [OPTIONS]\n" - << "\n" - << "Options:\n" - << " -o, --output-format FORMAT Output format: human or json (default: human)\n" - << " -f, --output-file FILE JSON output file path (default: report.jsonl)\n" - << " -v, --verbose Increase verbosity\n" - << " -h, --help Show this help message\n"; - std::cout << ss.str(); -} - -int main(int argc, char* argv[]) { - std::string outputFormat = "human"; - std::string outputFile = "report.jsonl"; - bool verbose = false; - - static struct option longOptions[] = {{"output-format", required_argument, 0, 'o'}, - {"output-file", required_argument, 0, 'f'}, - {"verbose", no_argument, 0, 'v'}, - {"help", no_argument, 0, 'h'}, - {0, 0, 0, 0}}; - - int c; - while ((c = getopt_long(argc, argv, "o:f:vh", longOptions, nullptr)) != -1) { - switch (c) { - case 'o': - outputFormat = optarg; - break; - case 'f': - outputFile = optarg; - break; - case 'v': - verbose = true; - break; - case 'h': - printUsage(argv[0]); - return 0; - default: - printUsage(argv[0]); - return 1; - } - } - - std::vector>> tests = { - {"AllFifoTests", "FIFO performance tests with multiple configurations", runAllFifoTests}}; - - int result = utils::runMultipleTests(argc, argv, tests); - - if (utils::isMainRank()) { - if (outputFormat == "json") { - utils::writeResultsToFile(outputFile); - } else { - utils::printResults(verbose); - } - } - - utils::cleanupMPI(); - - return result; -} diff --git a/test/perf/framework.cc b/test/perf/framework.cc deleted file mode 100644 index 85f7abd81..000000000 --- a/test/perf/framework.cc +++ /dev/null @@ -1,208 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include "framework.hpp" - -#include -#include -#include -#include - -namespace mscclpp { -namespace test { - -// Global state for results -static std::vector g_results; -static int g_mpi_rank = 0; -static int g_mpi_size = 1; -static bool g_mpi_initialized = false; - -namespace utils { - -// Internal MPI helper functions (not exposed in header) -void initializeMPI(int argc, char* argv[]) { - if (g_mpi_initialized) return; - - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &g_mpi_rank); - MPI_Comm_size(MPI_COMM_WORLD, &g_mpi_size); - g_mpi_initialized = true; -} - -static void finalizeMPI() { - if (!g_mpi_initialized) return; - - MPI_Finalize(); - g_mpi_initialized = false; -} - -static int getMPIRank() { return g_mpi_rank; } - -static int getMPISize() { return g_mpi_size; } - -static bool isMainProcess() { return g_mpi_rank == 0; } - -// Public utility functions for test output -bool isMainRank() { return g_mpi_rank == 0; } - -void cleanupMPI() { finalizeMPI(); } - -std::string getCurrentTimestamp() { - auto now = std::chrono::system_clock::now(); - auto time_t = std::chrono::system_clock::to_time_t(now); - std::stringstream ss; - ss << std::put_time(std::gmtime(&time_t), "%Y-%m-%dT%H:%M:%S"); - return ss.str(); -} - -void recordResult(const std::string& test_name, const std::string& test_category, const nlohmann::ordered_json& metrics, - const std::map& test_params) { - TestResult result; - result.test_name = test_name; - result.test_category = test_category; - result.test_params = test_params; - result.metrics = metrics; - result.num_processes = g_mpi_size; - result.process_rank = g_mpi_rank; - result.timestamp = getCurrentTimestamp(); - - g_results.push_back(result); -} - -void writeResultsToFile(const std::string& filename) { - std::ofstream file(filename); - if (!file) { - throw std::runtime_error("Cannot open output file: " + filename); - } - - for (const auto& result : g_results) { - nlohmann::ordered_json j; - j["test_name"] = result.test_name; - j["test_category"] = result.test_category; - j["test_config"] = result.test_params; - j["metrics"] = result.metrics; - j["num_processes"] = result.num_processes; - j["process_rank"] = result.process_rank; - j["timestamp"] = result.timestamp; - - file << j.dump() << std::endl; - } -} - -void printResults(bool verbose) { - if (!isMainProcess()) return; - - std::cout << "\n=== Test Results ===" << std::endl; - - for (const auto& result : g_results) { - std::cout << "\nTest: " << result.test_name << " (" << result.test_category << ")" << std::endl; - - if (verbose && !result.test_params.empty()) { - std::cout << " Parameters:" << std::endl; - for (const auto& param : result.test_params) { - std::cout << " " << param.first << ": " << param.second << std::endl; - } - } - - std::cout << " Metrics:" << std::endl; - for (auto it = result.metrics.begin(); it != result.metrics.end(); ++it) { - std::cout << " " << it.key() << ": " << it.value() << std::endl; - } - } - std::cout << std::endl; -} - -// Timer implementation -Timer::Timer() : is_running_(false) {} - -void Timer::start() { - start_time_ = std::chrono::high_resolution_clock::now(); - is_running_ = true; -} - -void Timer::stop() { - end_time_ = std::chrono::high_resolution_clock::now(); - is_running_ = false; -} - -double Timer::elapsedMicroseconds() const { - if (is_running_) { - auto now = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(now - start_time_).count(); - } - return std::chrono::duration_cast(end_time_ - start_time_).count(); -} - -double Timer::elapsedMilliseconds() const { return elapsedMicroseconds() / 1000.0; } - -double Timer::elapsedSeconds() const { return elapsedMicroseconds() / 1000000.0; } - -void cudaCheck(cudaError_t err, const char* file, int line) { - if (err != cudaSuccess) { - std::string msg = - std::string("CUDA error at ") + file + ":" + std::to_string(line) + " - " + cudaGetErrorString(err); - throw std::runtime_error(msg); - } -} - -int runMultipleTests( - int argc, char* argv[], - const std::vector>>& tests) { - int totalResult = 0; - - // Initialize MPI once for all tests - initializeMPI(argc, argv); - - try { - // Get MPI information - int rank = getMPIRank(); - int size = getMPISize(); - int local_rank = rank; // For simplicity, assume local_rank = rank - - for (const auto& test : tests) { - const std::string& testName = std::get<0>(test); - const std::string& testDescription = std::get<1>(test); - const std::function& testFunction = std::get<2>(test); - - if (rank == 0) { - std::cout << "Running test: " << testName << std::endl; - if (!testDescription.empty()) { - std::cout << " " << testDescription << std::endl; - } - } - - // Don't clear results - accumulate them for all tests in the same file - // g_results.clear(); // Commented out to accumulate results - - try { - // Run the individual test function with MPI information - testFunction(rank, size, local_rank); - - // Synchronize before moving to next test - MPI_Barrier(MPI_COMM_WORLD); - - } catch (const std::exception& e) { - if (rank == 0) { - std::cerr << "Error in test " << testName << ": " << e.what() << std::endl; - } - totalResult = 1; - } - } - - // Don't cleanup MPI here - let the caller handle it - // finalizeMPI(); - - } catch (const std::exception& e) { - if (g_mpi_rank == 0) { - std::cerr << "Error: " << e.what() << std::endl; - } - finalizeMPI(); - return 1; - } - - return totalResult; -} - -} // namespace utils -} // namespace test -} // namespace mscclpp diff --git a/test/perf/framework.hpp b/test/perf/framework.hpp deleted file mode 100644 index e9b8c31f5..000000000 --- a/test/perf/framework.hpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#ifndef MSCCLPP_TEST_PERF_FRAMEWORK_HPP_ -#define MSCCLPP_TEST_PERF_FRAMEWORK_HPP_ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace mscclpp { -namespace test { - -// Test result structure -struct TestResult { - std::string test_name; - std::string test_category; - std::map test_params; - nlohmann::ordered_json metrics; - int num_processes; - int process_rank; - std::string timestamp; -}; - -// Simple utility functions for testing -namespace utils { - -// Test execution utilities -int runMultipleTests( - int argc, char* argv[], - const std::vector>>& tests); - -// MPI management -void initializeMPI(int argc, char* argv[]); -void cleanupMPI(); -bool isMainRank(); - -// Result recording -void recordResult(const std::string& test_name, const std::string& test_category, const nlohmann::ordered_json& metrics, - const std::map& test_params = {}); - -// Output utilities -void writeResultsToFile(const std::string& filename); -void printResults(bool verbose = false); -void cleanupMPI(); - -// Timing utilities -class Timer { - public: - Timer(); - void start(); - void stop(); - double elapsedMicroseconds() const; - double elapsedMilliseconds() const; - double elapsedSeconds() const; - - private: - std::chrono::high_resolution_clock::time_point start_time_; - std::chrono::high_resolution_clock::time_point end_time_; - bool is_running_; -}; - -// CUDA utilities -void cudaCheck(cudaError_t err, const char* file, int line); -#define CUDA_CHECK(call) cudaCheck(call, __FILE__, __LINE__) - -} // namespace utils - -} // namespace test -} // namespace mscclpp - -#endif // MSCCLPP_TEST_PERF_FRAMEWORK_HPP_ diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 312d31ef5..7836e0632 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -1,11 +1,13 @@ # Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. +# Licensed under the MIT License. target_sources(unit_tests PRIVATE + unit_tests_main.cc core_tests.cc gpu_utils_tests.cc errors_tests.cc fifo_tests.cu + fifo_perf_tests.cu numa_tests.cc socket_tests.cc utils_tests.cc diff --git a/test/unit/compile_tests.cu b/test/unit/compile_tests.cu index 9db91a4f4..893bb9403 100644 --- a/test/unit/compile_tests.cu +++ b/test/unit/compile_tests.cu @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. +// Licensed under the MIT License. -#include +#include "../framework.hpp" #undef NDEBUG #ifndef DEBUG_BUILD diff --git a/test/unit/core_tests.cc b/test/unit/core_tests.cc index 32e6a1b57..d2552ff31 100644 --- a/test/unit/core_tests.cc +++ b/test/unit/core_tests.cc @@ -1,12 +1,14 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include -#include +// Licensed under the MIT License. #include -class LocalCommunicatorTest : public ::testing::Test { +#include "../framework.hpp" + +// TODO: TransportFlags needs operator<< for EXPECT_EQ to work +// Using ASSERT_TRUE with manual comparisons as workaround + +class LocalCommunicatorTest : public ::mscclpp::test::TestCase { protected: void SetUp() override { bootstrap = std::make_shared(0, 1); @@ -18,15 +20,15 @@ class LocalCommunicatorTest : public ::testing::Test { std::shared_ptr comm; }; -TEST_F(LocalCommunicatorTest, RegisterMemory) { +TEST(LocalCommunicatorTest, RegisterMemory) { int dummy[42]; auto memory = comm->registerMemory(&dummy, sizeof(dummy), mscclpp::NoTransports); EXPECT_EQ(memory.data(), &dummy); EXPECT_EQ(memory.size(), sizeof(dummy)); - EXPECT_EQ(memory.transports(), mscclpp::NoTransports); + ASSERT_TRUE(memory.transports() == mscclpp::NoTransports); } -TEST_F(LocalCommunicatorTest, SendMemoryToSelf) { +TEST(LocalCommunicatorTest, SendMemoryToSelf) { int dummy[42]; auto memory = comm->registerMemory(&dummy, sizeof(dummy), mscclpp::NoTransports); comm->sendMemory(memory, 0); @@ -34,5 +36,5 @@ TEST_F(LocalCommunicatorTest, SendMemoryToSelf) { auto sameMemory = memoryFuture.get(); EXPECT_EQ(sameMemory.data(), memory.data()); EXPECT_EQ(sameMemory.size(), memory.size()); - EXPECT_EQ(sameMemory.transports(), memory.transports()); + ASSERT_TRUE(sameMemory.transports() == memory.transports()); } diff --git a/test/unit/errors_tests.cc b/test/unit/errors_tests.cc index f9faad199..3eeed3875 100644 --- a/test/unit/errors_tests.cc +++ b/test/unit/errors_tests.cc @@ -1,30 +1,33 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include +// Licensed under the MIT License. #include +#include "../framework.hpp" + +// TODO: ErrorCode needs operator<< for EXPECT_EQ to work +// Using ASSERT_TRUE with manual comparisons as workaround + TEST(ErrorsTest, SystemError) { mscclpp::Error error("test", mscclpp::ErrorCode::SystemError); - EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::SystemError); + ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::SystemError); EXPECT_EQ(error.what(), std::string("test (mscclpp failure: SystemError)")); } TEST(ErrorsTest, InternalError) { mscclpp::Error error("test", mscclpp::ErrorCode::InternalError); - EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::InternalError); + ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::InternalError); EXPECT_EQ(error.what(), std::string("test (mscclpp failure: InternalError)")); } TEST(ErrorsTest, InvalidUsage) { mscclpp::Error error("test", mscclpp::ErrorCode::InvalidUsage); - EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::InvalidUsage); + ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::InvalidUsage); EXPECT_EQ(error.what(), std::string("test (mscclpp failure: InvalidUsage)")); } TEST(ErrorsTest, Timeout) { mscclpp::Error error("test", mscclpp::ErrorCode::Timeout); - EXPECT_EQ(error.getErrorCode(), mscclpp::ErrorCode::Timeout); + ASSERT_TRUE(error.getErrorCode() == mscclpp::ErrorCode::Timeout); EXPECT_EQ(error.what(), std::string("test (mscclpp failure: Timeout)")); } diff --git a/test/unit/fifo_perf_tests.cu b/test/unit/fifo_perf_tests.cu new file mode 100644 index 000000000..34b5d6bc6 --- /dev/null +++ b/test/unit/fifo_perf_tests.cu @@ -0,0 +1,85 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include +#include +#include +#include +#include +#include + +#include "../framework.hpp" + +// Simple FIFO performance test to be run as part of unit_tests +// This is a performance test that can be excluded from coverage runs +// using the --exclude-perf-tests flag. + +constexpr uint64_t TIMEOUT_SPINS = 1000000; +constexpr int MIN_TRIGGERS = 100; // Reduced for faster unit test execution + +__constant__ mscclpp::FifoDeviceHandle gFifoPerfDeviceHandle; + +__global__ void kernelFifoPerfPush(size_t numTriggers) { + mscclpp::FifoDeviceHandle& fifo = gFifoPerfDeviceHandle; + int tid = threadIdx.x + blockIdx.x * blockDim.x; + mscclpp::ProxyTrigger trigger; + for (size_t i = 1; i <= numTriggers; ++i) { + trigger.fst = i; + trigger.snd = tid ^ i; + fifo.push(trigger); + } +} + +static bool consumePerfTriggers(std::unique_ptr& hostFifo, int numTriggers, int parallel) { + int totalTriggers = numTriggers * parallel; + std::unordered_map triggerCounts; + for (int i = 0; i < totalTriggers; ++i) { + mscclpp::ProxyTrigger trigger; + uint64_t spin = 0; + do { + trigger = hostFifo->poll(); + if (spin++ > TIMEOUT_SPINS) { + return false; + } + } while (trigger.fst == 0 || trigger.snd == 0); + + trigger.snd ^= ((uint64_t)1 << (uint64_t)63); + trigger.snd = trigger.snd ^ trigger.fst; + if (triggerCounts[trigger.snd] + 1 != trigger.fst) { + return false; // Validation failed + } + triggerCounts[trigger.snd]++; + hostFifo->pop(); + } + return true; +} + +PERF_TEST(FifoPerfTest, BasicPerformance) { + int cudaDevice, numaNode; + CUDA_CHECK(cudaGetDevice(&cudaDevice)); + numaNode = mscclpp::getDeviceNumaNode(cudaDevice); + mscclpp::numaBind(numaNode); + + const int fifoSize = 128; + const int numTriggers = MIN_TRIGGERS; + const int numParallel = 1; + + auto hostFifo = std::make_unique(fifoSize); + mscclpp::FifoDeviceHandle hostHandle = hostFifo->deviceHandle(); + CUDA_CHECK(cudaMemcpyToSymbol(gFifoPerfDeviceHandle, &hostHandle, sizeof(mscclpp::FifoDeviceHandle))); + + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + + // Run kernel + kernelFifoPerfPush<<>>(numTriggers); + CUDA_CHECK(cudaGetLastError()); + + // Process triggers + bool success = consumePerfTriggers(hostFifo, numTriggers, numParallel); + ASSERT_TRUE(success); + + CUDA_CHECK(cudaStreamSynchronize(stream)); + CUDA_CHECK(cudaStreamDestroy(stream)); + CUDA_CHECK(cudaDeviceSynchronize()); +} diff --git a/test/unit/fifo_tests.cu b/test/unit/fifo_tests.cu index b67a220d1..8d30ca5ed 100644 --- a/test/unit/fifo_tests.cu +++ b/test/unit/fifo_tests.cu @@ -1,13 +1,12 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include +// Licensed under the MIT License. #include #include #include #include +#include "../framework.hpp" #include "utils_internal.hpp" #define ITER 10000 // should be larger than the FIFO size for proper testing diff --git a/test/unit/gpu_utils_tests.cc b/test/unit/gpu_utils_tests.cc index f4aba0d75..977314e98 100644 --- a/test/unit/gpu_utils_tests.cc +++ b/test/unit/gpu_utils_tests.cc @@ -1,10 +1,10 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include +// Licensed under the MIT License. #include +#include "../framework.hpp" + TEST(GpuUtilsTest, StreamPool) { auto streamPool = mscclpp::gpuStreamPool(); cudaStream_t s; diff --git a/test/unit/local_channel_tests.cu b/test/unit/local_channel_tests.cu index 50ffc9ea5..699baa385 100644 --- a/test/unit/local_channel_tests.cu +++ b/test/unit/local_channel_tests.cu @@ -1,13 +1,13 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include +// Licensed under the MIT License. #include #include #include #include +#include "../framework.hpp" + #define MAGIC_CONST 777 __constant__ mscclpp::PortChannelDeviceHandle gPortChannel; diff --git a/test/unit/numa_tests.cc b/test/unit/numa_tests.cc index dfa63a74a..46bf5e18b 100644 --- a/test/unit/numa_tests.cc +++ b/test/unit/numa_tests.cc @@ -1,11 +1,11 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include +// Licensed under the MIT License. #include #include +#include "../framework.hpp" + TEST(NumaTest, Basic) { int num; MSCCLPP_CUDATHROW(cudaGetDeviceCount(&num)); diff --git a/test/unit/socket_tests.cc b/test/unit/socket_tests.cc index 1ab592bae..a5598938f 100644 --- a/test/unit/socket_tests.cc +++ b/test/unit/socket_tests.cc @@ -1,11 +1,10 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include +// Licensed under the MIT License. #include #include +#include "../framework.hpp" #include "socket.h" #include "utils_internal.hpp" diff --git a/test/unit/unit_tests_main.cc b/test/unit/unit_tests_main.cc new file mode 100644 index 000000000..397566e05 --- /dev/null +++ b/test/unit/unit_tests_main.cc @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#include "../framework.hpp" + +int main(int argc, char** argv) { return RUN_ALL_TESTS(); } diff --git a/test/unit/utils_internal_tests.cc b/test/unit/utils_internal_tests.cc index 5479a681a..8526d9feb 100644 --- a/test/unit/utils_internal_tests.cc +++ b/test/unit/utils_internal_tests.cc @@ -1,10 +1,9 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -#include - #include +#include "../framework.hpp" #include "utils_internal.hpp" TEST(UtilsInternalTest, getHostHash) { diff --git a/test/unit/utils_tests.cc b/test/unit/utils_tests.cc index fa079b306..51562c219 100644 --- a/test/unit/utils_tests.cc +++ b/test/unit/utils_tests.cc @@ -1,12 +1,12 @@ // Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#include +// Licensed under the MIT License. #include #include #include +#include "../framework.hpp" + TEST(UtilsTest, getHostName) { std::string hostname1 = mscclpp::getHostName(1024, '.'); EXPECT_FALSE(hostname1.empty());