diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 2f32cc80f..8e88d48bb 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -20,7 +20,12 @@ from lib.db.models.session import DbSession from lib.db.queries.physio_file import try_get_physio_file_with_path from lib.env import Env -from lib.import_bids_dataset.copy_files import copy_scans_tsv_file_to_loris_bids_dir +from lib.import_bids_dataset.copy_files import ( + copy_loris_bids_file, + copy_scans_tsv_file_to_loris_bids_dir, + get_loris_bids_file_path, +) +from lib.import_bids_dataset.env import BidsImportEnv from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type_from_extension from lib.import_bids_dataset.physio import ( get_check_bids_physio_file_hash, @@ -40,9 +45,8 @@ class Eeg: into the database by calling the lib.physiological class. """ - def __init__(self, env: Env, bids_layout, bids_info: BidsDataTypeInfo, session: DbSession, db, - data_dir, loris_bids_eeg_rel_dir, - loris_bids_root_dir, dataset_tag_dict, dataset_type): + def __init__(self, env: Env, import_env: BidsImportEnv, bids_layout, bids_info: BidsDataTypeInfo, + session: DbSession, db, dataset_tag_dict, dataset_type): """ Constructor method for the Eeg class. @@ -52,12 +56,7 @@ def __init__(self, env: Env, bids_layout, bids_info: BidsDataTypeInfo, session: :param session : The LORIS session the EEG datasets are linked to :param db : Database class object :type db : object - :param data_dir : LORIS data directory path (usually /data/PROJECT/data) - :type data_dir : str - :param loris_bids_eeg_rel_dir: LORIS BIDS EEG relative dir path to data_dir - :type loris_bids_eeg_rel_dir: str - :param loris_bids_root_dir : LORIS BIDS root directory path - :type loris_bids_root_dir : str + :param info : The BIDS import pipeline information :param dataset_tag_dict : Dict of dataset-inherited HED tags :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset @@ -71,9 +70,8 @@ def __init__(self, env: Env, bids_layout, bids_info: BidsDataTypeInfo, session: # load the LORIS BIDS import root directory where the eeg files will # be copied - self.loris_bids_eeg_rel_dir = loris_bids_eeg_rel_dir - self.loris_bids_root_dir = loris_bids_root_dir - self.data_dir = data_dir + self.info = import_env + self.data_dir = self.info.data_dir_path # load bids subject, visit and modality self.bids_info = bids_info @@ -294,14 +292,9 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if sidecar_json is not None: eeg_file_data = sidecar_json.data - sidecar_json_path = os.path.relpath(sidecar_json.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the JSON file to the LORIS BIDS import directory - sidecar_json_path = self.copy_file_to_loris_bids_dir( - sidecar_json.path, derivatives - ) + sidecar_json_path = self.copy_file_to_loris_bids_dir(sidecar_json.path, derivatives) + eeg_file_data['eegjson_file'] = str(sidecar_json_path) - eeg_file_data['eegjson_file'] = sidecar_json_path json_blake2 = compute_file_blake2b_hash(sidecar_json.path) eeg_file_data['physiological_json_file_blake2b_hash'] = json_blake2 @@ -325,12 +318,12 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): print(f"ERROR: {error}") sys.exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - if self.loris_bids_root_dir: + if self.info.loris_bids_path: # copy the scans.tsv file to the LORIS BIDS import directory scans_path = copy_scans_tsv_file_to_loris_bids_dir( self.scans_file, - self.bids_info.subject, - self.loris_bids_root_dir, + self.session, + self.info.data_dir_path / self.info.loris_bids_path, self.data_dir, ) @@ -342,13 +335,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): # eeg_file_data dictionary fdt_file_path = None if file_type.name == 'set' and fdt_file: - fdt_file_path = os.path.relpath(fdt_file, self.data_dir) - if self.loris_bids_root_dir: - # copy the fdt file to the LORIS BIDS import directory - fdt_file_path = self.copy_file_to_loris_bids_dir( - fdt_file.path, derivatives - ) - + fdt_file_path = self.copy_file_to_loris_bids_dir(fdt_file.path, derivatives) eeg_file_data['fdt_file'] = fdt_file_path fdt_blake2 = compute_file_blake2b_hash(fdt_file.path) eeg_file_data['physiological_fdt_file_blake2b_hash'] = fdt_blake2 @@ -367,18 +354,17 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): # grep the modality ID from physiological_modality table modality = get_check_bids_physio_modality(self.env, self.bids_info.data_type) - if self.loris_bids_root_dir: - # copy the eeg_file to the LORIS BIDS import directory - eeg_path = self.copy_file_to_loris_bids_dir( - eeg_file.path, derivatives - ) + # copy the eeg_file to the LORIS BIDS import directory + eeg_path = self.copy_file_to_loris_bids_dir( + eeg_file.path, derivatives + ) # insert the file along with its information into # physiological_file and physiological_parameter_file tables physio_file = insert_physio_file( self.env, self.session, - Path(eeg_path), + eeg_path, file_type, modality, output_type, @@ -388,7 +374,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): insert_physio_file_parameters(self.env, physio_file, eeg_file_data) self.env.db.commit() - if self.loris_bids_root_dir: + if self.info.loris_bids_path: # If we copy the file in assembly_bids and # if the EEG file was a set file, then update the filename for the .set # and .fdt files in the .set file so it can find the proper file for @@ -457,12 +443,10 @@ def fetch_and_insert_electrode_file( ) if not result: electrode_data = utilities.read_tsv_file(electrode_file.path) - electrode_path = os.path.relpath(electrode_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the electrode file to the LORIS BIDS import directory - electrode_path = self.copy_file_to_loris_bids_dir( - electrode_file.path, derivatives - ) + # copy the electrode file to the LORIS BIDS import directory + electrode_path = self.copy_file_to_loris_bids_dir( + electrode_file.path, derivatives + ) # get the blake2b hash of the electrode file blake2 = compute_file_blake2b_hash(electrode_file.path) @@ -497,12 +481,10 @@ def fetch_and_insert_electrode_file( electrode_ids ) else: - electrode_metadata_path = os.path.relpath(coordsystem_metadata_file, self.data_dir) - if self.loris_bids_root_dir: - # copy the electrode metadata file to the LORIS BIDS import directory - electrode_metadata_path = self.copy_file_to_loris_bids_dir( - coordsystem_metadata_file.path, derivatives - ) + # copy the electrode metadata file to the LORIS BIDS import directory + electrode_metadata_path = self.copy_file_to_loris_bids_dir( + coordsystem_metadata_file.path, derivatives + ) # load json data with open(coordsystem_metadata_file.path) as metadata_file: electrode_metadata = json.load(metadata_file) @@ -562,12 +544,10 @@ def fetch_and_insert_channel_file( if physiological_file.channels != []: return physiological_file.channels[0].file_path - channel_path = os.path.relpath(channels_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the channel file to the LORIS BIDS import directory - channel_path = self.copy_file_to_loris_bids_dir( - channels_file.path, derivatives - ) + # copy the channel file to the LORIS BIDS import directory + channel_path = self.copy_file_to_loris_bids_dir( + channels_file.path, derivatives + ) # get the blake2b hash of the channel file blake2 = compute_file_blake2b_hash(channels_file.path) # insert the channel data in the database @@ -637,19 +617,16 @@ def fetch_and_insert_event_files( full_search = False, subject=self.bids_info.subject, ) - inheritance = False if not event_metadata_file: message = "WARNING: no events metadata files (events.json) associated " \ f"with physiological file ID {physiological_file.id}" print(message) else: - event_metadata_path = os.path.relpath(event_metadata_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the event file to the LORIS BIDS import directory - event_metadata_path = self.copy_file_to_loris_bids_dir( - event_metadata_file.path, derivatives, inheritance - ) + # copy the event file to the LORIS BIDS import directory + event_metadata_path = self.copy_file_to_loris_bids_dir( + event_metadata_file.path, derivatives + ) # load json data with open(event_metadata_file.path) as metadata_file: event_metadata = json.load(metadata_file) @@ -658,7 +635,7 @@ def fetch_and_insert_event_files( # insert event metadata in the database _, file_tag_dict = physiological.insert_event_metadata( event_metadata=event_metadata, - event_metadata_file=event_metadata_path, + event_metadata_file=str(event_metadata_path), physiological_file=physiological_file, project_id=self.session.project.id, blake2=blake2, @@ -668,19 +645,17 @@ def fetch_and_insert_event_files( event_paths.extend([event_metadata_path]) # get events.tsv file and insert - event_path = os.path.relpath(events_data_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the event file to the LORIS BIDS import directory - event_path = self.copy_file_to_loris_bids_dir( - events_data_file.path, derivatives - ) + # copy the event file to the LORIS BIDS import directory + event_path = self.copy_file_to_loris_bids_dir( + events_data_file.path, derivatives + ) # get the blake2b hash of the task events file blake2 = compute_file_blake2b_hash(events_data_file.path) # insert event data in the database physiological.insert_event_file( events_file=events_data_file, - event_file=event_path, + event_file=str(event_path), physiological_file=physiological_file, project_id=self.session.project.id, blake2=blake2, @@ -693,7 +668,7 @@ def fetch_and_insert_event_files( return event_paths - def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False): + def copy_file_to_loris_bids_dir(self, file, derivatives=False): """ Wrapper around the utilities.copy_file function that copies the file to the LORIS BIDS import directory and returns the relative path of the @@ -706,47 +681,19 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False :type derivatives: boolean :return: relative path to the copied file - :rtype: str + :rtype: Path """ - # Handle derivatives differently - # Data path structure is unpredictable, so keep the same relative path - if derivatives: - copy_file = os.path.relpath(file, self.bids_layout.root) - copy_file = os.path.join(self.loris_bids_root_dir, copy_file) - else : - # determine the path of the copied file - copy_file = "" - if not inheritance: - copy_file = self.loris_bids_eeg_rel_dir - if self.bids_info.session is not None: - copy_file = os.path.join(copy_file, os.path.basename(file)) - else: - # make sure the ses- is included in the new filename if using - # default visit label from the LORIS config - copy_file = os.path.join( - copy_file, - os.path.basename(file).replace( - f'sub-{self.bids_info.subject}', - f'sub-{self.bids_info.subject}_ses-{self.session.visit_label}' - ) - ) - - copy_file = os.path.join(self.loris_bids_root_dir, copy_file) - - # create the directory if it does not exist - lib.utilities.create_dir( - os.path.dirname(copy_file), - self.env.verbose + loris_file_path = get_loris_bids_file_path( + self.info, + self.session, + self.bids_info.data_type, + Path(file), + derivatives, ) - # copy the file - utilities.copy_file(file, copy_file, self.env.verbose) - - # determine the relative path and return it - relative_path = os.path.relpath(copy_file, self.data_dir) - - return relative_path + copy_loris_bids_file(self.info, Path(file), loris_file_path) + return loris_file_path def create_and_insert_archive(self, files_to_archive: list[str], archive_rel_name: str, eeg_file: DbPhysioFile): """ diff --git a/python/lib/import_bids_dataset/copy_files.py b/python/lib/import_bids_dataset/copy_files.py index 4df03ddd3..ae68ef9c8 100644 --- a/python/lib/import_bids_dataset/copy_files.py +++ b/python/lib/import_bids_dataset/copy_files.py @@ -1,23 +1,102 @@ - import os +import re +import shutil +from pathlib import Path from loris_bids_reader.files.scans import BidsScansTsvFile import lib.utilities +from lib.db.models.session import DbSession +from lib.import_bids_dataset.env import BidsImportEnv + + +def get_loris_bids_file_path( + import_env: BidsImportEnv, + session: DbSession, + data_type: str, + file_path: Path, + derivative: bool = False, +) -> Path: + """ + Get the path of a BIDS file in LORIS, relative to the LORIS data directory. + """ + + # In the import is run in no-copy mode, simply return the original file path. + if import_env.loris_bids_path is None: + return file_path.relative_to(import_env.data_dir_path) + + # If the file is a derivative, the path is unpredictable, so return a copy of that path in the + # LORIS BIDS dataset. + if derivative: + return import_env.loris_bids_path / file_path.relative_to(import_env.source_bids_path) + + # Otherwise, normalize the subject and session directrory names using the LORIS session + # information. + loris_file_name = get_loris_bids_file_name(file_path.name, session) + + return ( + import_env.loris_bids_path + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / data_type + / loris_file_name + ) + + +def get_loris_bids_file_name(file_name: str, session: DbSession) -> str: + """ + Get the name of a BIDS file in LORIS, replacing or adding the BIDS subject and session labels + with the LORIS PSCID and visit label. + """ + + # Remove the subject and session entities if they are present. + file_name = re.sub(r'sub-[a-zA-Z0-9]+_?', '', file_name) + file_name = re.sub(r'ses-[a-zA-Z0-9]+_?', '', file_name) + + # Add the LORIS subject and session information back in the correct order. + return f'sub-{session.candidate.psc_id}_ses-{session.visit_label}_{file_name}' + + +def copy_loris_bids_file(import_env: BidsImportEnv, file_path: Path, loris_file_path: Path): + """ + Copy a BIDS file to the LORIS data directory, unless the no-copy mode is enabled. + """ + + # Do not copy the file in no-copy mode. + if import_env.loris_bids_path is None: + return + + full_loris_file_path = import_env.data_dir_path / loris_file_path + + if full_loris_file_path.exists(): + raise Exception(f"File '{loris_file_path}' already exists in LORIS.") + + full_loris_file_path.parent.mkdir(parents=True, exist_ok=True) + if file_path.is_file(): + shutil.copyfile(file_path, full_loris_file_path) + elif file_path.is_dir(): + shutil.copytree(file_path, full_loris_file_path) +# TODO: This function is ugly and should be replaced. def copy_scans_tsv_file_to_loris_bids_dir( scans_file: BidsScansTsvFile, - bids_sub_id: str, - loris_bids_root_dir: str, - data_dir: str, + session: DbSession, + loris_bids_root_dir: Path, + data_dir: Path, ) -> str: """ Copy the scans.tsv file to the LORIS BIDS directory for the subject. """ original_file_path = scans_file.path - final_file_path = os.path.join(loris_bids_root_dir, f'sub-{bids_sub_id}', scans_file.path.name) + loris_file_name = get_loris_bids_file_name(scans_file.path.name, session) + final_file_path = ( + loris_bids_root_dir + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / loris_file_name + ) # copy the scans.tsv file to the new directory if os.path.exists(final_file_path): diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py new file mode 100644 index 000000000..62c5469a5 --- /dev/null +++ b/python/lib/import_bids_dataset/env.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class BidsImportEnv: + """ + Information about a specific BIDS import pipeline run. + """ + + data_dir_path: Path + """ + The LORIS data directory path. + """ + + source_bids_path: Path + """ + The source BIDS directory path. + """ + + loris_bids_path: Path | None + """ + The LORIS BIDS directory path for this import, relative to the LORIS data directory. + """ diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index 840cf3e79..68c0a21a2 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -27,6 +27,7 @@ from lib.env import Env from lib.import_bids_dataset.check_sessions import check_or_create_bids_sessions from lib.import_bids_dataset.check_subjects import check_or_create_bids_subjects +from lib.import_bids_dataset.env import BidsImportEnv from lib.make_env import make_env from lib.mri import Mri @@ -291,6 +292,12 @@ def read_and_insert_bids( hed_union=hed_union ) + import_env = BidsImportEnv( + data_dir_path = Path(data_dir), + source_bids_path = Path(bids_dir), + loris_bids_path = Path(loris_bids_root_dir).relative_to(data_dir) if loris_bids_root_dir is not None else None, + ) + # read list of modalities per session / candidate and register data for data_type_reader in bids_reader.data_types: bids_info = data_type_reader.info @@ -314,15 +321,13 @@ def read_and_insert_bids( case 'eeg' | 'ieeg': Eeg( env, - bids_layout = bids_reader.layout, - session = session, - bids_info = bids_info, - db = db, - data_dir = data_dir, - loris_bids_eeg_rel_dir = loris_bids_data_type_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type + import_env, + bids_layout = bids_reader.layout, + session = session, + bids_info = bids_info, + db = db, + dataset_tag_dict = dataset_tag_dict, + dataset_type = type ) case 'anat' | 'dwi' | 'fmap' | 'func': Mri(