diff --git a/babs/bootstrap.py b/babs/bootstrap.py index d8af8d09..de9a1d07 100644 --- a/babs/bootstrap.py +++ b/babs/bootstrap.py @@ -399,18 +399,25 @@ def _bootstrap_single_app_scripts( """Bootstrap scripts for single BIDS app configuration.""" container = Container(container_ds, container_name, container_config) - # Generate `_zip.sh`: ---------------------------------- - # which is a bash script of singularity run + zip - # in folder: `analysis/code` - print('\nGenerating a bash script for running container and zipping the outputs...') - print('This bash script will be named as `' + container_name + '_zip.sh`') - bash_path = op.join(self.analysis_path, 'code', container_name + '_zip.sh') + # Generate `_run.sh`: ---------------------------------- + print('\nGenerating run script: ' + container_name + '_run.sh') + bash_path = op.join(self.analysis_path, 'code', container_name + '_run.sh') container.generate_bash_run_bidsapp(bash_path, self.input_datasets, self.processing_level) self.datalad_save( - path='code/' + container_name + '_zip.sh', + path='code/' + container_name + '_run.sh', message='Generate script of running container', ) + # Generate `_zip.sh` if zipping is enabled: --------------- + if self.zip_foldernames: + print('Generating zip script: ' + container_name + '_zip.sh') + bash_path = op.join(self.analysis_path, 'code', container_name + '_zip.sh') + container.generate_bash_zip_outputs(bash_path, self.processing_level) + self.datalad_save( + path='code/' + container_name + '_zip.sh', + message='Generate zip script', + ) + # make another folder within `code` for test jobs: os.makedirs(op.join(self.analysis_path, 'code/check_setup'), exist_ok=True) diff --git a/babs/check_setup.py b/babs/check_setup.py index 86bf9dbd..d332865c 100644 --- a/babs/check_setup.py +++ b/babs/check_setup.py @@ -156,7 +156,7 @@ def babs_check_setup(self, submit_a_test_job): else: list_files_code = [ 'babs_proj_config.yaml', - container_name + '_zip.sh', + container_name + '_run.sh', 'participant_job.sh', 'submit_job_template.yaml', ] diff --git a/babs/container.py b/babs/container.py index f3296286..ef0fee6c 100644 --- a/babs/container.py +++ b/babs/container.py @@ -6,8 +6,9 @@ import yaml from jinja2 import Environment, PackageLoader, StrictUndefined -from babs.generate_bidsapp_runscript import generate_bidsapp_runscript +from babs.generate_bidsapp_runscript import generate_bidsapp_runscript, get_output_zipping_cmds from babs.generate_submit_script import generate_submit_script, generate_test_submit_script +from babs.constants import OUTPUT_MAIN_FOLDERNAME from babs.utils import app_output_settings_from_config @@ -145,6 +146,29 @@ def generate_bash_run_bidsapp(self, bash_path, input_ds, processing_level): print('Below is the generated BIDS App run script:') print(script_content) + def generate_bash_zip_outputs(self, bash_path, processing_level): + """Generate a standalone bash script that zips BIDS App outputs.""" + dict_zip_foldernames, _ = app_output_settings_from_config(self.config) + cmd_zip = get_output_zipping_cmds(dict_zip_foldernames, processing_level) + + env = Environment( + loader=PackageLoader('babs', 'templates'), + trim_blocks=True, + lstrip_blocks=True, + autoescape=False, + undefined=StrictUndefined, + ) + template = env.get_template('zip_outputs.sh.jinja2') + script_content = template.render( + processing_level=processing_level, + cmd_zip=cmd_zip, + OUTPUT_MAIN_FOLDERNAME=OUTPUT_MAIN_FOLDERNAME, + ) + + with open(bash_path, 'w') as f: + f.write(script_content) + os.chmod(bash_path, 0o700) + def generate_bash_participant_job( self, bash_path, input_ds, processing_level, system, project_root=None ): @@ -173,7 +197,7 @@ def generate_bash_participant_job( input_datasets=input_ds.as_records(), processing_level=processing_level, container_name=self.container_name, - zip_foldernames=self.config['zip_foldernames'], + zip_foldernames=self.config.get('zip_foldernames', None), project_root=project_root, ) diff --git a/babs/generate_bidsapp_runscript.py b/babs/generate_bidsapp_runscript.py index a24db5fc..680f58b3 100644 --- a/babs/generate_bidsapp_runscript.py +++ b/babs/generate_bidsapp_runscript.py @@ -73,9 +73,6 @@ def generate_bidsapp_runscript( # Get unzip commands for any zipped input datasets cmd_unzip_inputds = get_input_unzipping_cmds(input_datasets) - # Generate zip command - cmd_zip = get_output_zipping_cmds(dict_zip_foldernames, processing_level) - # Render the template env = Environment( loader=PackageLoader('babs', 'templates'), @@ -101,8 +98,6 @@ def generate_bidsapp_runscript( bids_app_input_dir=bids_app_input_dir, bids_app_output_dir=bids_app_output_dir, bids_app_args=bids_app_args, - cmd_zip=cmd_zip, - OUTPUT_MAIN_FOLDERNAME=OUTPUT_MAIN_FOLDERNAME, singularity_flags=singularity_args, subject_selection_flag=subject_selection_flag, ) diff --git a/babs/generate_submit_script.py b/babs/generate_submit_script.py index c586619d..ab1fc0fc 100644 --- a/babs/generate_submit_script.py +++ b/babs/generate_submit_script.py @@ -6,6 +6,8 @@ import yaml from jinja2 import Environment, PackageLoader, StrictUndefined +from babs.constants import OUTPUT_MAIN_FOLDERNAME + # Multiple scheduler system handling DIRECTIVE_PREFIX = { 'sge': '#$', @@ -122,6 +124,7 @@ def generate_submit_script( container_images=container_images, datalad_run_message=datalad_run_message, project_root=project_root, + OUTPUT_MAIN_FOLDERNAME=OUTPUT_MAIN_FOLDERNAME, ) diff --git a/babs/templates/bidsapp_run.sh.jinja2 b/babs/templates/bidsapp_run.sh.jinja2 index 12b5da0e..f7d40eb9 100644 --- a/babs/templates/bidsapp_run.sh.jinja2 +++ b/babs/templates/bidsapp_run.sh.jinja2 @@ -3,6 +3,7 @@ set -e -u -x subid="$1" {% if processing_level == 'session' %} +# shellcheck disable=SC2034 # not always used in generated script sesid="$2" {% endif %} {% for input_ds in input_datasets|selectattr('is_zipped') %} @@ -70,8 +71,7 @@ singularity run \ {% endfor %} {{ subject_selection_flag }} "${subid}" -{{ cmd_zip }} -rm -rf {{ OUTPUT_MAIN_FOLDERNAME }} .git/tmp/wkdir +rm -rf .git/tmp/wkdir {% if flag_filterfile %} rm -f "${filterfile}" {% endif %} \ No newline at end of file diff --git a/babs/templates/participant_job.sh.jinja2 b/babs/templates/participant_job.sh.jinja2 index c6051411..d4c6ea75 100644 --- a/babs/templates/participant_job.sh.jinja2 +++ b/babs/templates/participant_job.sh.jinja2 @@ -125,9 +125,9 @@ if [ ! -L "${CONTAINER_JOB}" ]; then exit 1 fi -# datalad run: +# datalad run — execute container: datalad run \ - -i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \ + -i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_run.sh' }}" \ {% for input_dataset in input_datasets %} {% if not input_dataset['is_zipped'] %} -i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \ @@ -147,13 +147,21 @@ datalad run \ --expand inputs \ {% endif %} --explicit \ -{% if zip_foldernames is not none %} + -o "{{ OUTPUT_MAIN_FOLDERNAME }}/" \ + -m "{{ (datalad_run_message if datalad_run_message is defined and datalad_run_message else container_name) }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ + "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_run.sh' }} {% raw %}${subid}{% endraw %} {% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset['is_zipped'] %} ${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}{%endif%}{%endfor%}" + +{% if zip_foldernames and not run_script_relpath %} +# datalad run — zip outputs: +datalad run \ + -i "code/{{ container_name }}_zip.sh" \ + --explicit \ {% for key, value in zip_foldernames.items() %} -o "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}_{% raw %}${sesid}{% endraw %}{% endif %}_{{ key }}-{{ value }}.zip" \ {% endfor %} + -m "zip {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ + "bash ./code/{{ container_name }}_zip.sh {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" {% endif %} - -m "{{ (datalad_run_message if datalad_run_message is defined and datalad_run_message else container_name) }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \ - "bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} {% raw %}${subid}{% endraw %} {% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset['is_zipped'] %} ${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}{%endif%}{%endfor%}" # Finish up: # push result file content to output RIA storage: diff --git a/babs/templates/zip_outputs.sh.jinja2 b/babs/templates/zip_outputs.sh.jinja2 new file mode 100644 index 00000000..7f7f4864 --- /dev/null +++ b/babs/templates/zip_outputs.sh.jinja2 @@ -0,0 +1,10 @@ +#!/bin/bash +set -e -u -x + +subid="$1" +{% if processing_level == 'session' %} +sesid="$2" +{% endif %} + +{{ cmd_zip }} +rm -rf {{ OUTPUT_MAIN_FOLDERNAME }} diff --git a/babs/utils.py b/babs/utils.py index 71a245f7..238a2205 100644 --- a/babs/utils.py +++ b/babs/utils.py @@ -204,12 +204,10 @@ def app_output_settings_from_config(config): # By default, the output folder is `outputs`: bids_app_output_dir = OUTPUT_MAIN_FOLDERNAME - # Sanity check: this section should exist: - if 'zip_foldernames' not in config: - raise Exception( - 'The `container_config` does not contain' - ' the section `zip_foldernames`. Please add this section!' - ) + # TODO: consider nesting zip options under an "output_zip" section + # zip_foldernames is optional — missing or empty means no zipping + if not config.get('zip_foldernames'): + return None, bids_app_output_dir # Check if placeholder to make a sub-folder in `outputs` folder create_output_dir_for_single_zip = config.get('all_results_in_one_zip', None) diff --git a/tests/e2e-slurm/container/config_simbids_no_zip.yaml b/tests/e2e-slurm/container/config_simbids_no_zip.yaml new file mode 100644 index 00000000..9f352f2b --- /dev/null +++ b/tests/e2e-slurm/container/config_simbids_no_zip.yaml @@ -0,0 +1,34 @@ +input_datasets: + BIDS: + required_files: + - "anat/*_T1w.nii*" + is_zipped: false + origin_url: "/test-temp/simbids" + path_in_babs: inputs/data/BIDS + +bids_app_args: + --bids-app: "fmriprep" + $SUBJECT_SELECTION_FLAG: "--participant-label" + --stop-on-first-crash: "" + -vv: "" + --anat-only: "" + +singularity_args: + - --containall + - --writable-tmpfs + +cluster_resources: + interpreting_shell: "/bin/bash" + +script_preamble: | + PATH=/opt/conda/envs/babs/bin:$PATH + +job_compute_space: "/tmp" + +alert_log_messages: + stdout: + - "Excessive topologic defect encountered" + - "Cannot allocate memory" + - "mris_curvature_stats: Could not open file" + - "Numerical result out of range" + - "fMRIPrep failed" diff --git a/tests/e2e-slurm/container/walkthrough-tests.sh b/tests/e2e-slurm/container/walkthrough-tests.sh index c8947af8..ec98852d 100755 --- a/tests/e2e-slurm/container/walkthrough-tests.sh +++ b/tests/e2e-slurm/container/walkthrough-tests.sh @@ -145,3 +145,87 @@ done babs status babs merge + +popd +# Create a third BABS project with no zipping (loose output files) +TEST3_NAME=no_zip_test +babs init \ + --container_ds "${PWD}"/simbids-container \ + --container_name simbids-0-0-3 \ + --container_config "/tests/tests/e2e-slurm/container/config_simbids_no_zip.yaml" \ + --processing_level subject \ + --queue slurm \ + --keep-if-failed \ + "${PWD}/${TEST3_NAME}" + +echo "PASSED: babs init (no zip)" + +pushd "${PWD}/${TEST3_NAME}/analysis" +datalad get "containers/.datalad/environments/simbids-0-0-3/image" +popd + +pushd "${PWD}/${TEST3_NAME}" + +babs check-setup + +babs submit +while [[ -n $(squeue -u "$USER" -t RUNNING,PENDING --noheader) ]]; do + squeue -u "$USER" -t RUNNING,PENDING + echo "Waiting for running jobs to finish..." + sleep 5 +done + +babs status + +sacct -u "$USER" +if sacct -u "$USER" --noheader | grep -q "FAILED"; then + echo "=========================================================================" + echo "There are failed jobs in no-zip test." + LOGS_DIR="analysis/logs" + if [ -d "$LOGS_DIR" ]; then + for f in "$LOGS_DIR"/*; do + [ -f "$f" ] && echo "---------- $f ----------" && cat "$f" + done + fi + exit 1 +fi +echo "PASSED: No failed jobs (no-zip)" + +babs merge + +# After merge, verify outputs in the output RIA are loose files, not zips +RIA_PATH=$(find "${PWD}/output_ria" -mindepth 2 -maxdepth 2 -type d | head -1) +echo "=========================================================================" +echo "Top-level entries in output RIA after merge:" +git -C "$RIA_PATH" ls-tree --name-only HEAD +echo "=========================================================================" + +if git -C "$RIA_PATH" ls-tree --name-only HEAD | grep -q '\.zip'; then + echo "FAILED: found .zip files in no-zip project" + exit 1 +fi +echo "PASSED: no .zip files in output RIA (as expected)" + +if ! git -C "$RIA_PATH" ls-tree --name-only HEAD | grep -q 'outputs'; then + echo "FAILED: no outputs directory found in output RIA" + exit 1 +fi +echo "PASSED: outputs/ directory found in output RIA" + +if ! git -C "$RIA_PATH" ls-tree -r --name-only HEAD -- outputs | grep -q 'dataset_description.json'; then + echo "FAILED: missing dataset_description.json in outputs" + exit 1 +fi + +if ! git -C "$RIA_PATH" ls-tree -r --name-only HEAD -- outputs | grep -q 'sub-0001'; then + echo "FAILED: missing sub-0001 results in outputs" + exit 1 +fi + +if ! git -C "$RIA_PATH" ls-tree -r --name-only HEAD -- outputs | grep -q 'sub-0002'; then + echo "FAILED: missing sub-0002 results in outputs" + exit 1 +fi +echo "PASSED: output files present for both subjects" + +echo "PASSED: e2e no-zip walkthrough successful!" diff --git a/tests/pytest_in_docker.sh b/tests/pytest_in_docker.sh index 4251d601..78584ca6 100755 --- a/tests/pytest_in_docker.sh +++ b/tests/pytest_in_docker.sh @@ -1,14 +1,33 @@ #!/bin/bash +# Usage: +# ./tests/pytest_in_docker.sh # run all tests +# ./tests/pytest_in_docker.sh -sv tests/test_status.py # run specific tests +set -eu +if [ $# -gt 0 ]; then + ARGS=("$@") +else + ARGS=(-svx --pdb /babs/tests/) +fi + +# In a worktree, .git is a file pointing to the main repo's object store. +# Mount the real .git dir so setuptools-scm can resolve the version. +GIT_COMMON_DIR="$(git rev-parse --git-common-dir 2>/dev/null || true)" +EXTRA_MOUNT=() +if [ -n "${GIT_COMMON_DIR}" ] && [ "${GIT_COMMON_DIR}" != ".git" ]; then + REAL_GIT_DIR="$(cd "${GIT_COMMON_DIR}" && pwd)" + EXTRA_MOUNT=(-v "${REAL_GIT_DIR}:${REAL_GIT_DIR}") +fi + docker run -it \ --platform linux/amd64 \ -h slurmctl --cap-add sys_admin \ --privileged \ -v "$(pwd)":/babs \ + "${EXTRA_MOUNT[@]}" \ -w /babs \ pennlinc/slurm-docker-ci:0.14 \ - bash -c "pip install -e .[tests] && pytest -svx \ + bash -c "pip install -e .[tests] && pytest \ --cov-report=term-missing \ --cov-report=xml:/tmp/coverage.xml \ --cov=babs \ - --pdb \ - /babs/tests/" + ${ARGS[*]}" diff --git a/tests/test_generate_bidsapp_runscript.py b/tests/test_generate_bidsapp_runscript.py index 19a34402..725fb04c 100644 --- a/tests/test_generate_bidsapp_runscript.py +++ b/tests/test_generate_bidsapp_runscript.py @@ -3,10 +3,14 @@ import pytest +from jinja2 import Environment, PackageLoader, StrictUndefined + +from babs.constants import OUTPUT_MAIN_FOLDERNAME from babs.generate_bidsapp_runscript import ( generate_bidsapp_runscript, generate_pipeline_runscript, get_input_unzipping_cmds, + get_output_zipping_cmds, ) from babs.utils import ( app_output_settings_from_config, @@ -164,6 +168,66 @@ def run_shellcheck(script_path): return False, str(e) +@pytest.mark.parametrize('processing_level', ['subject', 'session']) +def test_generate_bidsapp_runscript_no_zip(processing_level, tmp_path): + """Test that the run script works when zip_foldernames is absent.""" + script_content = generate_bidsapp_runscript( + input_datasets_prep, + processing_level, + container_name='toybidsapp-0-0-7', + relative_container_path='containers/.datalad/containers/toybidsapp-0-0-7/image', + bids_app_output_dir='outputs', + dict_zip_foldernames=None, + bids_app_args='', + singularity_args=['--containall'], + templateflow_home=None, + ) + + assert '7z' not in script_content + assert 'rm -rf outputs' not in script_content + assert 'singularity run' in script_content + + out_fn = tmp_path / f'no_zip_{processing_level}.sh' + with open(out_fn, 'w') as f: + f.write(script_content) + passed, status = run_shellcheck(str(out_fn)) + if not passed: + print(script_content) + assert passed, status + + +@pytest.mark.parametrize('processing_level', ['subject', 'session']) +def test_generate_zip_outputs_script(processing_level, tmp_path): + """Test that the standalone zip script is generated correctly.""" + dict_zip_foldernames = {'fmriprep_anat': '24-1-1'} + cmd_zip = get_output_zipping_cmds(dict_zip_foldernames, processing_level) + + env = Environment( + loader=PackageLoader('babs', 'templates'), + trim_blocks=True, + lstrip_blocks=True, + autoescape=False, + undefined=StrictUndefined, + ) + template = env.get_template('zip_outputs.sh.jinja2') + script_content = template.render( + processing_level=processing_level, + cmd_zip=cmd_zip, + OUTPUT_MAIN_FOLDERNAME=OUTPUT_MAIN_FOLDERNAME, + ) + + assert '7z' in script_content + assert 'rm -rf outputs' in script_content + + out_fn = tmp_path / f'zip_outputs_{processing_level}.sh' + with open(out_fn, 'w') as f: + f.write(script_content) + passed, status = run_shellcheck(str(out_fn)) + if not passed: + print(script_content) + assert passed, status + + def test_generate_pipeline_runscript(tmp_path): """Test that the pipeline runscript is generated correctly.""" config_path = NOTEBOOKS_DIR / 'eg_nordic-fmriprep_pipeline.yaml' diff --git a/tests/test_generate_submit_script.py b/tests/test_generate_submit_script.py index 62184ef0..2eace80b 100644 --- a/tests/test_generate_submit_script.py +++ b/tests/test_generate_submit_script.py @@ -141,6 +141,66 @@ def run_shellcheck(script_path): return False, str(e) +@pytest.mark.parametrize('processing_level', ['subject', 'session']) +def test_generate_submit_script_no_zip(processing_level, tmp_path): + """Test participant_job.sh with no zipping: one datalad run, -o outputs/.""" + config_path = NOTEBOOKS_DIR / 'eg_toybidsapp-0-0-7_rawBIDS-walkthrough.yaml' + config = read_yaml(config_path) + script_content = generate_submit_script( + queue_system='slurm', + cluster_resources_config=config['cluster_resources'], + script_preamble=config['script_preamble'], + job_scratch_directory=config['job_compute_space'], + input_datasets=input_datasets_prep, + processing_level=processing_level, + container_name='toybidsapp-0-0-7', + zip_foldernames=None, + ) + + assert script_content.count('datalad run') == 1 + assert '-o "outputs/"' in script_content + assert '.zip' not in script_content + assert '_zip.sh' not in script_content + + out_fn = tmp_path / f'participant_job_no_zip_{processing_level}.sh' + with open(out_fn, 'w') as f: + f.write(script_content) + passed, status = run_shellcheck(str(out_fn)) + if not passed: + print(script_content) + assert passed, status + + +@pytest.mark.parametrize('processing_level', ['subject', 'session']) +def test_generate_submit_script_with_zip(processing_level, tmp_path): + """Test participant_job.sh with zipping: two datalad run blocks.""" + config_path = NOTEBOOKS_DIR / 'eg_toybidsapp-0-0-7_rawBIDS-walkthrough.yaml' + config = read_yaml(config_path) + script_content = generate_submit_script( + queue_system='slurm', + cluster_resources_config=config['cluster_resources'], + script_preamble=config['script_preamble'], + job_scratch_directory=config['job_compute_space'], + input_datasets=input_datasets_prep, + processing_level=processing_level, + container_name='toybidsapp-0-0-7', + zip_foldernames=config['zip_foldernames'], + ) + + assert script_content.count('datalad run') == 2 + assert '-o "outputs/"' in script_content + assert '_zip.sh' in script_content + assert '.zip"' in script_content + + out_fn = tmp_path / f'participant_job_with_zip_{processing_level}.sh' + with open(out_fn, 'w') as f: + f.write(script_content) + passed, status = run_shellcheck(str(out_fn)) + if not passed: + print(script_content) + assert passed, status + + def test_generate_submit_script_pipeline(tmp_path): """Test submit script generation for pipeline configuration.""" # Use same pattern as single-app tests: read from existing YAML config diff --git a/tests/test_utils.py b/tests/test_utils.py index 9a3314b8..e23d8e49 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -105,11 +105,17 @@ def test_app_output_settings_from_config(): assert result_single == single_zip_config['zip_foldernames'] assert output_dir_single == 'outputs/output1' - # Test with empty zip_foldernames (should raise exception) + # Test with empty zip_foldernames (no zipping) empty_config = {'zip_foldernames': {}} - - with pytest.raises(Exception, match='No output folder name provided'): - app_output_settings_from_config(empty_config) + result_empty, output_dir_empty = app_output_settings_from_config(empty_config) + assert result_empty is None + assert output_dir_empty == 'outputs' + + # Test with missing zip_foldernames (no zipping) + missing_config = {} + result_missing, output_dir_missing = app_output_settings_from_config(missing_config) + assert result_missing is None + assert output_dir_missing == 'outputs' # Test with multiple foldernames and all_results_in_one_zip (should raise exception) multiple_folders_config = {