From aee7ee18c4e04b5a53c6fa3df62f6e7f79d152fd Mon Sep 17 00:00:00 2001 From: Jacob Merson Date: Sun, 15 Mar 2026 23:12:18 -0400 Subject: [PATCH 1/3] This PR uses pre-built dependencies for asics runner The current CI workflow is very expensive because it rebuilds lots of dependencies from scratch. To help alleviate this process, this PR makes use of pre-built dependencies (see #290). The risk of this change is if the dependencies do not get maintained on the SCOREC system, or if they become out of date with the rest of the CI infrastructure. However, since multiple folks use these dependencies on SCOREC, they will hopefully be maintained. --- .github/workflows/self-hosted.yml | 191 ++++++------------------------ 1 file changed, 36 insertions(+), 155 deletions(-) diff --git a/.github/workflows/self-hosted.yml b/.github/workflows/self-hosted.yml index 4b2472b7..0b71f624 100644 --- a/.github/workflows/self-hosted.yml +++ b/.github/workflows/self-hosted.yml @@ -23,6 +23,8 @@ jobs: submodules: recursive path: 'pcms_${{ github.event.id }}' #under $GITHUB_WORKSPACE + # note: modules match to https://github.com/LACES-LAB/software-dependencies/blob/main/loads-rhel9.sh + # However, due to risk of running user modifiable code, we repeat modules here. - name: setup id: setup shell: bash @@ -36,13 +38,16 @@ jobs: cat << 'EOF' > $workDir/envGcc13.sh #quotes prevent variable expansion in doc contents set +e #avoid exiting when lua modules return non-zero on 'warning' messages source /etc/profile #provides module command - module use /opt/scorec/spack/rhel9/v0222_2/lmod/linux-rhel9-x86_64/Core/ - module load gcc/13.2.0-4eahhas - module load mpich/4.2.3-62uy3hd - module load cmake/3.30.5-5e54py4 - module load cuda/12.6.2-gqq65nw - module load openblas/0.3.28-eubd5ed - export LD_LIBRARY_PATH=/opt/scorec/spack/rhel9/v0222_2/install/linux-rhel9-x86_64_v3/gcc-13.2.0/mpich-4.2.3-62uy3hdwqe75fjfskrrysco5i6gqjblv/lib:$LD_LIBRARY_PATH + module purge + module use /opt/scorec/spack/rhel9/v0201_4/lmod/linux-rhel9-x86_64/Core/ + module load gcc/12.3.0-iil3lno mpich/4.1.1-xpoyz4t cuda/12.1.1-zxa4msk + module load fftw/3.3.10-qqvl57a + module load cmake/3.26.3-xxgzxlv + #module load openblas/0.3.23-wqm7iud + module load netlib-lapack/3.11.0-b22mgwg #netlib-lapack includes blas + module load netlib-scalapack/2.2.0-fzd4jvl + echo "NVCC_WRAPPER_DEFAULT_COMPILER=`which mpicxx`" >> $GITHUB_ENV + echo "PETSC_OPTIONS='-use_gpu_aware_mpi 0'" >> $GITHUB_ENV set -e EOF @@ -52,167 +57,43 @@ jobs: run: | workDir=$PCMS_WORK_DIR source $workDir/envGcc13.sh - - # kokkos - git clone --branch 4.6.01 --depth 1 https://github.com/kokkos/kokkos.git ${workDir}/kokkos - kkbdir=${workDir}/build-kokkos - cmake -S ${workDir}/kokkos -B $kkbdir \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=$kkbdir/install \ - -DCMAKE_CXX_COMPILER=${workDir}/kokkos/bin/nvcc_wrapper \ - -DKokkos_ARCH_AMPERE80=ON \ - -DKokkos_ENABLE_SERIAL=ON \ - -DKokkos_ENABLE_OPENMP=off \ - -DKokkos_ENABLE_CUDA=on \ - -DKokkos_ENABLE_CUDA_LAMBDA=on \ - -DKokkos_ENABLE_CUDA_CONSTEXPR=on \ - -DKokkos_ENABLE_DEBUG=off - cmake --build $kkbdir --target install -j 4 - echo "KOKKOS_BUILD=$kkbdir" >> $GITHUB_ENV - - # kokkos-kernels - git clone --branch 4.6.01 --depth 1 https://github.com/kokkos/kokkos-kernels.git ${workDir}/kokkos-kernels - kkkbdir=${workDir}/build-kokkos-kernels - cmake -S ${workDir}/kokkos-kernels -B $kkkbdir \ - -DCMAKE_INSTALL_PREFIX=$kkkbdir/install \ - -DCMAKE_BUILD_TYPE=Release \ - -DKokkos_ROOT=${kkbdir}/install/ \ - -DBUILD_SHARED_LIBS=off - cmake --build $kkkbdir --target install -j 4 - echo "KOKKOS_KERNELS_BUILD=$kkkbdir" >> $GITHUB_ENV - - # adios2 with and without cuda - git clone --branch v2.10.2 https://github.com/ornladios/ADIOS2.git ${workDir}/ADIOS2 - adiosbdir=${workDir}/build-ADIOS2 - cmake -S ${workDir}/ADIOS2 -B $adiosbdir \ - -DCMAKE_INSTALL_PREFIX=$adiosbdir/install \ - -DADIOS2_USE_CUDA=on \ - -DADIOS2_USE_ZFP=off - cmake --build $adiosbdir --target install -j 4 - echo "ADIOS_BUILD=$adiosbdir" >> $GITHUB_ENV - - # perfstubs - git clone https://github.com/UO-OACISS/perfstubs.git ${workDir}/perfstubs - cd ${workDir}/perfstubs - git checkout a1fa3feb1d89214e28047f166500351074b5f0c2 - cd $workDir - psbdir=${workDir}/build-perfstubs - cmake -S ${workDir}/perfstubs -B $psbdir \ - -DCMAKE_INSTALL_PREFIX=$psbdir/install \ - -DCMAKE_CXX_COMPILER=mpicxx - cmake --build $psbdir --target install -j 4 - echo "PERFSTUBS_BUILD=$psbdir" >> $GITHUB_ENV - - # redev - git clone https://github.com/SCOREC/redev.git ${workDir}/redev - cd ${workDir}/redev - git checkout ac09848a5f9b89493e8b679c9080b9efe5538376 - cd $workDir - rdbdir=${workDir}/build-redev - cmake -S ${workDir}/redev -B $rdbdir \ - -DCMAKE_INSTALL_PREFIX=$rdbdir/install \ - -DADIOS2_DIR=$adiosbdir/install/lib64/cmake/adios2 \ - -Dperfstubs_DIR=$psbdir/install/lib/cmake \ - -DCMAKE_CXX_COMPILER=mpicxx \ - -DBUILD_SHARED_LIBS=OFF - cmake --build $rdbdir --target install -j 4 - echo "REDEV_BUILD=$rdbdir" >> $GITHUB_ENV - - # omega_h - git clone https://github.com/SCOREC/omega_h.git ${workDir}/omegah_h - cd omegah_h - git checkout 1765836a00b9a64b8b3791f1442ac52f147e43b2 - cd $workDir - ohbdir=${workDir}/build-omega_h - cmake -S ${workDir}/omegah_h -B $ohbdir \ - -DCMAKE_INSTALL_PREFIX=$ohbdir/install \ - -DKokkos_DIR=${kkbdir}/install/lib64/cmake/Kokkos \ - -DCMAKE_BUILD_TYPE=Release \ - -DBUILD_SHARED_LIBS=off \ - -DOmega_h_USE_Kokkos=ON \ - -DOmega_h_USE_CUDA=on \ - -DOmega_h_CUDA_ARCH=80 \ - -DOmega_h_USE_MPI=on \ - -DMPIEXEC_EXECUTABLE=mpirun \ - -DBUILD_TESTING=off \ - -DCMAKE_C_COMPILER=mpicc \ - -DCMAKE_CXX_COMPILER=mpicxx - cmake --build $ohbdir --target install -j 4 - echo "OMEGA_H_BUILD=$ohbdir" >> $GITHUB_ENV - - # meshfields - git clone https://github.com/SCOREC/meshFields.git ${workDir}/meshfields - cd ${workDir}/meshfields - git checkout b1482bbba288df210784b2345eae08e34faabdc4 - cd $workDir - mfbdir=${workDir}/build-meshfields - cmake -S ${workDir}/meshfields -B $mfbdir \ - -DCMAKE_INSTALL_PREFIX=$mfbdir/install \ - -DOmega_h_DIR=$ohbdir/install/lib64/cmake/Omega_h \ - -DKokkos_DIR=${kkbdir}/install/lib64/cmake/Kokkos \ - -DCMAKE_CXX_COMPILER=mpicxx \ - -DCMAKE_C_COMPILER=mpicc \ - -DMPIEXEC_EXECUTABLE=mpirun - cmake --build $mfbdir --target install -j 4 - echo "MESHFIELDS_BUILD=$mfbdir" >> $GITHUB_ENV - - # catch2 - git clone --branch v3.11.0 https://github.com/catchorg/Catch2.git ${workDir}/Catch2 - c2bdir=${workDir}/build-Catch2 - cmake -S ${workDir}/Catch2 -B $c2bdir \ - -DCMAKE_INSTALL_PREFIX=$c2bdir/install - cmake --build $c2bdir --target install -j 4 - echo "CATCH2_BUILD=$c2bdir" >> $GITHUB_ENV - - # petsc - git clone --branch v3.24.2 https://gitlab.com/petsc/petsc.git ${workDir}/petsc - cd ${workDir}/petsc - ./configure \ - PETSC_ARCH=cuda-kokkos \ - --with-kokkos-dir=$kkbdir/install/ \ - --with-kokkos-kernels-dir=$kkkbdir/install/ \ - --with-cuda=1 \ - --with-shared-libraries=0 \ - --with-openblas-dir="${OPENBLAS_RHEL9_ROOT}" - make all check - cd $workDir - echo "PETSC_BUILD=${workDir}/petsc/cuda-kokkos" >> $GITHUB_ENV - - git clone https://github.com/jacobmerson/pcms_testcases.git ${workDir}/pcms_testcases - - # pcms - export PETSC_OPTIONS="-use_gpu_aware_mpi 0" bdir=${workDir}/build-pcms + + # PCMS dependencies are built with https://github.com/LACES-LAB/software-dependencies/blob/main/build-deps-rhel9.sh + # Built artifacts are located in the $DEPENDENCY_DIR + DEPENDENCY_DIR=/users/mersoj2/laces-software/build/ + DEVICE_ARCH=AMPERE80 cmake -S ${{github.workspace}}/pcms_${{ github.event.id }} -B $bdir \ - -DCMAKE_BUILD_TYPE=Debug \ - -DCMAKE_C_COMPILER=mpicc \ - -DCMAKE_CXX_COMPILER=mpicxx \ + -DCMAKE_CXX_COMPILER=`which mpicxx` \ + -DCMAKE_C_COMPILER=`which mpicc` \ + -DCMAKE_Fortran_COMPILER=`which mpifort`\ -DPCMS_TIMEOUT=20 \ - -DPCMS_ENABLE_SPDLOG=OFF \ - -DPCMS_ENABLE_PETSC=ON \ - -DPETSC_LINK_STATIC=ON \ - -DPETSC_DIR=${workDir}/petsc \ - -DPETSC_ARCH=cuda-kokkos \ - -Dredev_DIR=$rdbdir/install/lib64/cmake/redev/ \ - -DOmega_h_DIR=$ohbdir/install/lib64/cmake/Omega_h/ \ - -Dperfstubs_DIR=$psbdir/install/lib/cmake/ \ - -DADIOS2_DIR=$adiosbdir/install/lib64/cmake/adios2/ \ - -DCatch2_DIR=$c2bdir/install/lib64/cmake/Catch2/ \ - -DKokkos_DIR=$kkbdir/install/lib64/cmake/Kokkos/ \ - -DKokkosKernels_DIR=$kkkbdir/install/lib64/cmake/KokkosKernels/ \ - -Dmeshfields_DIR=$mfbdir/install/lib64/cmake/meshfields/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DCatch2_DIR=$DEPENDENCY_DIR/Catch2/install/lib64/cmake/Catch2/ \ + -Dmeshfields_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/meshFields/install/lib64/cmake/meshfields \ + -DOmega_h_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/omega_h/install/lib64/cmake/Omega_h/ \ + -Dredev_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/redev/install/lib64/cmake/redev/ \ -DPCMS_TEST_DATA_DIR=${workDir}/pcms_testcases/ \ - -DCMAKE_CXX_EXTENSIONS=Off + -DMPIEXEC_EXECUTABLE=`which mpirun` \ + -DADIOS2_DIR=$DEPENDENCY_DIR/adios2/install/lib64/cmake/adios2/ \ + -Dperfstubs_DIR=$DEPENDENCY_DIR/perfstubs/install/lib/cmake/ \ + -DKokkos_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/kokkos/install/lib64/cmake/Kokkos/ \ + -DKokkosKernels_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/kokkos-kernels/install/lib64/cmake/KokkosKernels/ \ + -DBUILD_TESTING=ON \ + -DPETSC_ARCH="" \ + -DPETSC_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/petsc/install \ + -DPCMS_ENABLE_SPDLOG=OFF + cmake --build $bdir ctest --test-dir $bdir --output-on-failure + - name: Save Result Link if: ${{ !cancelled() }} #prepare report unless the job was cancelled run: | mkdir -p ./pr echo "${{ github.event.id }}" > ./pr/issueNumber echo "Test Results:" > ./pr/message - echo "- Kokkos CUDA: ${{ steps.build_pcms_kokkos_cuda.outcome }}" >> ./pr/message echo "" >> ./pr/message echo "[(details)](https://github.com/${{github.repository}}/actions/runs/${{ github.run_id }})" >> ./pr/message From bfccee43425b6586daecee1d33d681508c37cd58 Mon Sep 17 00:00:00 2001 From: Jacob Merson Date: Sun, 15 Mar 2026 22:57:17 -0400 Subject: [PATCH 2/3] debug outputs --- .github/workflows/self-hosted.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/self-hosted.yml b/.github/workflows/self-hosted.yml index 0b71f624..a699d78a 100644 --- a/.github/workflows/self-hosted.yml +++ b/.github/workflows/self-hosted.yml @@ -63,6 +63,11 @@ jobs: # Built artifacts are located in the $DEPENDENCY_DIR DEPENDENCY_DIR=/users/mersoj2/laces-software/build/ DEVICE_ARCH=AMPERE80 + # debug output, check if this directory is readable + ls $DEPENDENCY_DIR/${DEVICE_ARCH} + ls $DEPENDENCY_DIR/${DEVICE_ARCH}/redev/install/lib64/cmake/redev/ + echo $DEPENDENCY_DIR/${DEVICE_ARCH}/redev/install/lib64/cmake/redev/ + cmake -S ${{github.workspace}}/pcms_${{ github.event.id }} -B $bdir \ -DCMAKE_CXX_COMPILER=`which mpicxx` \ -DCMAKE_C_COMPILER=`which mpicc` \ From cf5af2d5303bcda96a54173f1a50a97ac7bef110 Mon Sep 17 00:00:00 2001 From: Jacob Merson Date: Sun, 15 Mar 2026 23:41:51 -0400 Subject: [PATCH 3/3] add mistakenly removed checkout of testcases repo --- .github/workflows/self-hosted.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/self-hosted.yml b/.github/workflows/self-hosted.yml index a699d78a..99165b18 100644 --- a/.github/workflows/self-hosted.yml +++ b/.github/workflows/self-hosted.yml @@ -46,8 +46,6 @@ jobs: #module load openblas/0.3.23-wqm7iud module load netlib-lapack/3.11.0-b22mgwg #netlib-lapack includes blas module load netlib-scalapack/2.2.0-fzd4jvl - echo "NVCC_WRAPPER_DEFAULT_COMPILER=`which mpicxx`" >> $GITHUB_ENV - echo "PETSC_OPTIONS='-use_gpu_aware_mpi 0'" >> $GITHUB_ENV set -e EOF @@ -59,6 +57,10 @@ jobs: source $workDir/envGcc13.sh bdir=${workDir}/build-pcms + export NVCC_WRAPPER_DEFAULT_COMPILER=`which mpicxx` + export PETSC_OPTIONS='-use_gpu_aware_mpi 0' + + git clone https://github.com/jacobmerson/pcms_testcases.git ${workDir}/pcms_testcases # PCMS dependencies are built with https://github.com/LACES-LAB/software-dependencies/blob/main/build-deps-rhel9.sh # Built artifacts are located in the $DEPENDENCY_DIR DEPENDENCY_DIR=/users/mersoj2/laces-software/build/ @@ -89,7 +91,7 @@ jobs: -DPETSC_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/petsc/install \ -DPCMS_ENABLE_SPDLOG=OFF - cmake --build $bdir + cmake --build $bdir -j 4 ctest --test-dir $bdir --output-on-failure