diff --git a/python/lib/config.py b/python/lib/config.py index b87bb7b16..60430a284 100644 --- a/python/lib/config.py +++ b/python/lib/config.py @@ -34,22 +34,7 @@ def get_data_dir_path_config(env: Env) -> Path: """ data_dir_path = Path(_get_config_value(env, 'dataDirBasepath')) - - if not data_dir_path.is_dir(): - log_error_exit( - env, - ( - f"The LORIS base data directory path configuration value '{data_dir_path}' does not refer to an" - " existing directory." - ) - ) - - if not os.access(data_dir_path, os.R_OK) or not os.access(data_dir_path, os.W_OK): - log_error_exit( - env, - f"Missing read or write permission on the LORIS base data directory '{data_dir_path}'.", - ) - + check_loris_directory(env, data_dir_path, "data") return data_dir_path @@ -60,22 +45,7 @@ def get_dicom_archive_dir_path_config(env: Env) -> Path: """ dicom_archive_dir_path = Path(_get_config_value(env, 'tarchiveLibraryDir')) - - if not dicom_archive_dir_path.is_dir(): - log_error_exit( - env, - ( - f"The LORIS DICOM archive directory path configuration value '{dicom_archive_dir_path}' does not refer" - " to an existing directory." - ), - ) - - if not os.access(dicom_archive_dir_path, os.R_OK) or not os.access(dicom_archive_dir_path, os.W_OK): - log_error_exit( - env, - f"Missing read or write permission on the LORIS DICOM archive directory '{dicom_archive_dir_path}'.", - ) - + check_loris_directory(env, dicom_archive_dir_path, "DICOM archive") return dicom_archive_dir_path @@ -87,75 +57,66 @@ def get_default_bids_visit_label_config(env: Env) -> str | None: return _try_get_config_value(env, 'default_bids_vl') -def get_eeg_viz_enabled_config(env: Env) -> bool: +def get_ephys_visualization_enabled_config(env: Env) -> bool: """ - Get whether the EEG visualization is enabled from the in-database configuration. + Get whether the electrophysiology visualization is enabled from the in-database configuration. """ - eeg_viz_enabled = _try_get_config_value(env, 'useEEGBrowserVisualizationComponents') - return eeg_viz_enabled == 'true' or eeg_viz_enabled == '1' + visualization_enabled = _try_get_config_value(env, 'useEEGBrowserVisualizationComponents') + return visualization_enabled == 'true' or visualization_enabled == '1' -def get_eeg_chunks_dir_path_config(env: Env) -> Path | None: +def get_ephys_chunks_dir_path_config(env: Env) -> Path | None: """ - Get the EEG chunks directory path configuration value from the in-database configuration. + Get the electrophysiology chunks directory path configuration value from the in-database + configuration. """ - eeg_chunks_path = _try_get_config_value(env, 'EEGChunksPath') - if eeg_chunks_path is None: + ephys_chunks_path = _try_get_config_value(env, 'EEGChunksPath') + if ephys_chunks_path is None: return None - eeg_chunks_path = Path(eeg_chunks_path) + ephys_chunks_path = Path(ephys_chunks_path) + check_loris_directory(env, ephys_chunks_path, "electrophysiology chunks") + return ephys_chunks_path - if not eeg_chunks_path.is_dir(): - log_error_exit( - env, - ( - f"The configuration value for the LORIS EEG chunks directory path '{eeg_chunks_path}' does not refer to" - " an existing directory." - ), - ) - - if not os.access(eeg_chunks_path, os.R_OK) or not os.access(eeg_chunks_path, os.W_OK): - log_error_exit( - env, - f"Missing read or write permission on the LORIS EEG chunks directory '{eeg_chunks_path}'.", - ) - return eeg_chunks_path - - -def get_eeg_pre_package_download_dir_path_config(env: Env) -> Path | None: +def get_ephys_archive_dir_path_config(env: Env) -> Path | None: """ - Get the EEG pre-packaged download path configuration value from the in-database configuration. + Get the electrophysiology archive directory path configuration value from the in-database + configuration. """ - eeg_pre_package_path = _try_get_config_value(env, 'prePackagedDownloadPath') - if eeg_pre_package_path is None: + ephys_archive_dir_path = _try_get_config_value(env, 'prePackagedDownloadPath') + if ephys_archive_dir_path is None: return None - eeg_pre_package_path = Path(eeg_pre_package_path) + ephys_archive_dir_path = Path(ephys_archive_dir_path) + check_loris_directory(env, ephys_archive_dir_path, "electrophysiology archive") + return ephys_archive_dir_path + + +def check_loris_directory(env: Env, dir_path: Path, display_name: str): + """ + Check that a LORIS directory exists and is readable and writable, or exit the program with an + error otherwise. + """ - if not eeg_pre_package_path.is_dir(): + if not dir_path.is_dir(): log_error_exit( env, ( - "The configuration value for the LORIS EEG pre-packaged download directory path" - f" '{eeg_pre_package_path}' does not refer to an existing directory." + f"The LORIS {display_name} directory path configuration value '{dir_path}' does not refer to an" + " existing directory." ), ) - if not os.access(eeg_pre_package_path, os.R_OK) or not os.access(eeg_pre_package_path, os.W_OK): + if not os.access(dir_path, os.R_OK) or not os.access(dir_path, os.W_OK): log_error_exit( env, - ( - "Missing read or write permission on the LORIS EEG pre-packaged download directory" - f" '{eeg_pre_package_path}'." - ), + f"Missing read or write permission on the {display_name} directory '{dir_path}'.", ) - return eeg_pre_package_path - def _get_config_value(env: Env, setting_name: str) -> str: """ diff --git a/python/lib/database_lib/physiological_event_archive.py b/python/lib/database_lib/physiological_event_archive.py index ae6ed5b9e..218302831 100644 --- a/python/lib/database_lib/physiological_event_archive.py +++ b/python/lib/database_lib/physiological_event_archive.py @@ -1,6 +1,9 @@ """This class performs database queries for the physiological_event_archive table""" +from typing_extensions import deprecated + +@deprecated('Use `lib.db.physio_event_archive.DbPhysioEventArchive` instead') class PhysiologicalEventArchive: def __init__(self, db, verbose): @@ -17,6 +20,7 @@ def __init__(self, db, verbose): self.table = 'physiological_event_archive' self.verbose = verbose + @deprecated('Use `lib.db.physio_event_archive.DbPhysioEventArchive.physio_file_id` instead') def grep_from_physiological_file_id(self, physiological_file_id): """ Gets rows given a physiological_file_id @@ -33,6 +37,7 @@ def grep_from_physiological_file_id(self, physiological_file_id): args=(physiological_file_id,) ) + @deprecated('Use `lib.db.physio_event_archive.DbPhysioEventArchive` instead') def insert(self, physiological_file_id, blake2, archive_path): """ Inserts a new entry in the physiological_event_archive table. diff --git a/python/lib/db/models/physio_event_archive.py b/python/lib/db/models/physio_event_archive.py index 38635b834..7b9bed8e9 100644 --- a/python/lib/db/models/physio_event_archive.py +++ b/python/lib/db/models/physio_event_archive.py @@ -14,6 +14,6 @@ class DbPhysioEventArchive(Base): id : Mapped[int] = mapped_column('EventArchiveID', primary_key=True) physio_file_id : Mapped[int] = mapped_column('PhysiologicalFileID', ForeignKey('physiological_file.PhysiologicalFileID')) blake2b_hash : Mapped[str] = mapped_column('Blake2bHash') - file_path : Mapped[Path] = mapped_column('FilePath', StringPath) + path : Mapped[Path] = mapped_column('FilePath', StringPath) physio_file: Mapped['db_physio_file.DbPhysioFile'] = relationship('DbPhysioFile') diff --git a/python/lib/db/models/physio_file_archive.py b/python/lib/db/models/physio_file_archive.py index b8f32ad10..2004cdfd0 100644 --- a/python/lib/db/models/physio_file_archive.py +++ b/python/lib/db/models/physio_file_archive.py @@ -16,6 +16,6 @@ class DbPhysioFileArchive(Base): physio_file_id : Mapped[int] = mapped_column('PhysiologicalFileID', ForeignKey('physiological_file.PhysiologicalFileID')) insert_time : Mapped[datetime] = mapped_column('InsertTime', default=datetime.now) blake2b_hash : Mapped[str] = mapped_column('Blake2bHash') - file_path : Mapped[Path] = mapped_column('FilePath', StringPath) + path : Mapped[Path] = mapped_column('FilePath', StringPath) physio_file: Mapped['db_physio_file.DbPhysioFile'] = relationship('DbPhysioFile') diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 2f32cc80f..48fe311ea 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -14,13 +14,18 @@ import lib.exitcode import lib.utilities as utilities -from lib.config import get_eeg_pre_package_download_dir_path_config, get_eeg_viz_enabled_config -from lib.database_lib.physiological_event_archive import PhysiologicalEventArchive +from lib.config import get_ephys_visualization_enabled_config from lib.db.models.physio_file import DbPhysioFile from lib.db.models.session import DbSession from lib.db.queries.physio_file import try_get_physio_file_with_path from lib.env import Env -from lib.import_bids_dataset.copy_files import copy_scans_tsv_file_to_loris_bids_dir +from lib.import_bids_dataset.archive import import_physio_event_archive, import_physio_file_archive +from lib.import_bids_dataset.copy_files import ( + copy_loris_bids_file, + copy_scans_tsv_file_to_loris_bids_dir, + get_loris_bids_file_path, +) +from lib.import_bids_dataset.env import BidsImportEnv from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type_from_extension from lib.import_bids_dataset.physio import ( get_check_bids_physio_file_hash, @@ -40,9 +45,8 @@ class Eeg: into the database by calling the lib.physiological class. """ - def __init__(self, env: Env, bids_layout, bids_info: BidsDataTypeInfo, session: DbSession, db, - data_dir, loris_bids_eeg_rel_dir, - loris_bids_root_dir, dataset_tag_dict, dataset_type): + def __init__(self, env: Env, import_env: BidsImportEnv, bids_layout, bids_info: BidsDataTypeInfo, + session: DbSession, db, dataset_tag_dict, dataset_type): """ Constructor method for the Eeg class. @@ -52,12 +56,7 @@ def __init__(self, env: Env, bids_layout, bids_info: BidsDataTypeInfo, session: :param session : The LORIS session the EEG datasets are linked to :param db : Database class object :type db : object - :param data_dir : LORIS data directory path (usually /data/PROJECT/data) - :type data_dir : str - :param loris_bids_eeg_rel_dir: LORIS BIDS EEG relative dir path to data_dir - :type loris_bids_eeg_rel_dir: str - :param loris_bids_root_dir : LORIS BIDS root directory path - :type loris_bids_root_dir : str + :param info : The BIDS import pipeline information :param dataset_tag_dict : Dict of dataset-inherited HED tags :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset @@ -71,9 +70,8 @@ def __init__(self, env: Env, bids_layout, bids_info: BidsDataTypeInfo, session: # load the LORIS BIDS import root directory where the eeg files will # be copied - self.loris_bids_eeg_rel_dir = loris_bids_eeg_rel_dir - self.loris_bids_root_dir = loris_bids_root_dir - self.data_dir = data_dir + self.info = import_env + self.data_dir = self.info.data_dir_path # load bids subject, visit and modality self.bids_info = bids_info @@ -192,37 +190,30 @@ def register_data(self, derivatives=False, detect=True): ) # archive all files in a tar ball for downloading all files at once - files_to_archive: list[str] = [os.path.join(self.data_dir, eeg_file.path)] + files_to_archive: list[Path] = [self.data_dir / eeg_file.path] if eegjson_file_path: - files_to_archive.append(os.path.join(self.data_dir, eegjson_file_path)) + files_to_archive.append(self.data_dir / eegjson_file_path) + if channel_file_path: + files_to_archive.append(self.data_dir / channel_file_path) if fdt_file_path: - files_to_archive.append(os.path.join(self.data_dir, fdt_file_path)) + files_to_archive.append(self.data_dir / fdt_file_path) if electrode_file_path: - files_to_archive.append(os.path.join(self.data_dir, electrode_file_path)) + files_to_archive.append(self.data_dir / electrode_file_path) if event_file_paths: # archive all event files in a tar ball for event download - event_files_to_archive: list[str] = [] + event_files_to_archive: list[Path] = [] for event_file_path in event_file_paths: - files_to_archive.append(os.path.join(self.data_dir, event_file_path)) - event_files_to_archive.append(os.path.join(self.data_dir, event_file_path)) - - event_archive_rel_name = os.path.splitext(event_file_paths[0])[0] + ".tgz" - self.create_and_insert_event_archive( - event_files_to_archive, event_archive_rel_name, eeg_file - ) + files_to_archive.append(self.data_dir / event_file_path) + event_files_to_archive.append(self.data_dir / event_file_path) - if channel_file_path: - files_to_archive.append(os.path.join(self.data_dir, channel_file_path)) + import_physio_event_archive(self.env, eeg_file, event_files_to_archive) - archive_rel_name = os.path.splitext(eeg_file.path)[0] + ".tgz" - self.create_and_insert_archive( - files_to_archive, archive_rel_name, eeg_file - ) + import_physio_file_archive(self.env, eeg_file, files_to_archive) # create data chunks for React visualization - if get_eeg_viz_enabled_config(self.env): + if get_ephys_visualization_enabled_config(self.env): create_physio_channels_chunks(self.env, eeg_file) def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): @@ -294,14 +285,9 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): if sidecar_json is not None: eeg_file_data = sidecar_json.data - sidecar_json_path = os.path.relpath(sidecar_json.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the JSON file to the LORIS BIDS import directory - sidecar_json_path = self.copy_file_to_loris_bids_dir( - sidecar_json.path, derivatives - ) + sidecar_json_path = self.copy_file_to_loris_bids_dir(sidecar_json.path, derivatives) + eeg_file_data['eegjson_file'] = str(sidecar_json_path) - eeg_file_data['eegjson_file'] = sidecar_json_path json_blake2 = compute_file_blake2b_hash(sidecar_json.path) eeg_file_data['physiological_json_file_blake2b_hash'] = json_blake2 @@ -325,12 +311,12 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): print(f"ERROR: {error}") sys.exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - if self.loris_bids_root_dir: + if self.info.loris_bids_path: # copy the scans.tsv file to the LORIS BIDS import directory scans_path = copy_scans_tsv_file_to_loris_bids_dir( self.scans_file, - self.bids_info.subject, - self.loris_bids_root_dir, + self.session, + self.info.data_dir_path / self.info.loris_bids_path, self.data_dir, ) @@ -342,13 +328,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): # eeg_file_data dictionary fdt_file_path = None if file_type.name == 'set' and fdt_file: - fdt_file_path = os.path.relpath(fdt_file, self.data_dir) - if self.loris_bids_root_dir: - # copy the fdt file to the LORIS BIDS import directory - fdt_file_path = self.copy_file_to_loris_bids_dir( - fdt_file.path, derivatives - ) - + fdt_file_path = self.copy_file_to_loris_bids_dir(fdt_file.path, derivatives) eeg_file_data['fdt_file'] = fdt_file_path fdt_blake2 = compute_file_blake2b_hash(fdt_file.path) eeg_file_data['physiological_fdt_file_blake2b_hash'] = fdt_blake2 @@ -367,18 +347,17 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): # grep the modality ID from physiological_modality table modality = get_check_bids_physio_modality(self.env, self.bids_info.data_type) - if self.loris_bids_root_dir: - # copy the eeg_file to the LORIS BIDS import directory - eeg_path = self.copy_file_to_loris_bids_dir( - eeg_file.path, derivatives - ) + # copy the eeg_file to the LORIS BIDS import directory + eeg_path = self.copy_file_to_loris_bids_dir( + eeg_file.path, derivatives + ) # insert the file along with its information into # physiological_file and physiological_parameter_file tables physio_file = insert_physio_file( self.env, self.session, - Path(eeg_path), + eeg_path, file_type, modality, output_type, @@ -388,7 +367,7 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): insert_physio_file_parameters(self.env, physio_file, eeg_file_data) self.env.db.commit() - if self.loris_bids_root_dir: + if self.info.loris_bids_path: # If we copy the file in assembly_bids and # if the EEG file was a set file, then update the filename for the .set # and .fdt files in the .set file so it can find the proper file for @@ -457,12 +436,10 @@ def fetch_and_insert_electrode_file( ) if not result: electrode_data = utilities.read_tsv_file(electrode_file.path) - electrode_path = os.path.relpath(electrode_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the electrode file to the LORIS BIDS import directory - electrode_path = self.copy_file_to_loris_bids_dir( - electrode_file.path, derivatives - ) + # copy the electrode file to the LORIS BIDS import directory + electrode_path = self.copy_file_to_loris_bids_dir( + electrode_file.path, derivatives + ) # get the blake2b hash of the electrode file blake2 = compute_file_blake2b_hash(electrode_file.path) @@ -497,12 +474,10 @@ def fetch_and_insert_electrode_file( electrode_ids ) else: - electrode_metadata_path = os.path.relpath(coordsystem_metadata_file, self.data_dir) - if self.loris_bids_root_dir: - # copy the electrode metadata file to the LORIS BIDS import directory - electrode_metadata_path = self.copy_file_to_loris_bids_dir( - coordsystem_metadata_file.path, derivatives - ) + # copy the electrode metadata file to the LORIS BIDS import directory + electrode_metadata_path = self.copy_file_to_loris_bids_dir( + coordsystem_metadata_file.path, derivatives + ) # load json data with open(coordsystem_metadata_file.path) as metadata_file: electrode_metadata = json.load(metadata_file) @@ -562,12 +537,10 @@ def fetch_and_insert_channel_file( if physiological_file.channels != []: return physiological_file.channels[0].file_path - channel_path = os.path.relpath(channels_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the channel file to the LORIS BIDS import directory - channel_path = self.copy_file_to_loris_bids_dir( - channels_file.path, derivatives - ) + # copy the channel file to the LORIS BIDS import directory + channel_path = self.copy_file_to_loris_bids_dir( + channels_file.path, derivatives + ) # get the blake2b hash of the channel file blake2 = compute_file_blake2b_hash(channels_file.path) # insert the channel data in the database @@ -637,19 +610,16 @@ def fetch_and_insert_event_files( full_search = False, subject=self.bids_info.subject, ) - inheritance = False if not event_metadata_file: message = "WARNING: no events metadata files (events.json) associated " \ f"with physiological file ID {physiological_file.id}" print(message) else: - event_metadata_path = os.path.relpath(event_metadata_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the event file to the LORIS BIDS import directory - event_metadata_path = self.copy_file_to_loris_bids_dir( - event_metadata_file.path, derivatives, inheritance - ) + # copy the event file to the LORIS BIDS import directory + event_metadata_path = self.copy_file_to_loris_bids_dir( + event_metadata_file.path, derivatives + ) # load json data with open(event_metadata_file.path) as metadata_file: event_metadata = json.load(metadata_file) @@ -658,7 +628,7 @@ def fetch_and_insert_event_files( # insert event metadata in the database _, file_tag_dict = physiological.insert_event_metadata( event_metadata=event_metadata, - event_metadata_file=event_metadata_path, + event_metadata_file=str(event_metadata_path), physiological_file=physiological_file, project_id=self.session.project.id, blake2=blake2, @@ -668,19 +638,17 @@ def fetch_and_insert_event_files( event_paths.extend([event_metadata_path]) # get events.tsv file and insert - event_path = os.path.relpath(events_data_file.path, self.data_dir) - if self.loris_bids_root_dir: - # copy the event file to the LORIS BIDS import directory - event_path = self.copy_file_to_loris_bids_dir( - events_data_file.path, derivatives - ) + # copy the event file to the LORIS BIDS import directory + event_path = self.copy_file_to_loris_bids_dir( + events_data_file.path, derivatives + ) # get the blake2b hash of the task events file blake2 = compute_file_blake2b_hash(events_data_file.path) # insert event data in the database physiological.insert_event_file( events_file=events_data_file, - event_file=event_path, + event_file=str(event_path), physiological_file=physiological_file, project_id=self.session.project.id, blake2=blake2, @@ -693,7 +661,7 @@ def fetch_and_insert_event_files( return event_paths - def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False): + def copy_file_to_loris_bids_dir(self, file, derivatives=False): """ Wrapper around the utilities.copy_file function that copies the file to the LORIS BIDS import directory and returns the relative path of the @@ -706,167 +674,16 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False, inheritance=False :type derivatives: boolean :return: relative path to the copied file - :rtype: str + :rtype: Path """ - # Handle derivatives differently - # Data path structure is unpredictable, so keep the same relative path - if derivatives: - copy_file = os.path.relpath(file, self.bids_layout.root) - copy_file = os.path.join(self.loris_bids_root_dir, copy_file) - else : - # determine the path of the copied file - copy_file = "" - if not inheritance: - copy_file = self.loris_bids_eeg_rel_dir - if self.bids_info.session is not None: - copy_file = os.path.join(copy_file, os.path.basename(file)) - else: - # make sure the ses- is included in the new filename if using - # default visit label from the LORIS config - copy_file = os.path.join( - copy_file, - os.path.basename(file).replace( - f'sub-{self.bids_info.subject}', - f'sub-{self.bids_info.subject}_ses-{self.session.visit_label}' - ) - ) - - copy_file = os.path.join(self.loris_bids_root_dir, copy_file) - - # create the directory if it does not exist - lib.utilities.create_dir( - os.path.dirname(copy_file), - self.env.verbose + loris_file_path = get_loris_bids_file_path( + self.info, + self.session, + self.bids_info.data_type, + Path(file), + derivatives, ) - # copy the file - utilities.copy_file(file, copy_file, self.env.verbose) - - # determine the relative path and return it - relative_path = os.path.relpath(copy_file, self.data_dir) - - return relative_path - - def create_and_insert_archive(self, files_to_archive: list[str], archive_rel_name: str, eeg_file: DbPhysioFile): - """ - Create an archive with all electrophysiology files associated to a - specific recording (including electrodes.tsv, channels.tsv etc...) - :param files_to_archive: list of files to include in the archive - :param archive_rel_name: path to the archive relative to data_dir - :param eeg_file_id : PhysiologicalFileID - """ - - # load the Physiological object that will be used to insert the - # physiological archive into the database - physiological = Physiological(self.env, self.db, self.env.verbose) - - # check if archive is on the filesystem - (archive_rel_name, archive_full_path) = self.get_archive_paths(archive_rel_name) - if os.path.isfile(archive_full_path): - blake2 = compute_file_blake2b_hash(archive_full_path) - else: - blake2 = None - - # check if archive already inserted in database and matches the one - # on the filesystem using blake2b hash - if eeg_file.archive is not None: - if not blake2: - message = 'ERROR: no archive was found on the filesystem ' + \ - 'while an entry was found in the database for ' + \ - f'PhysiologicalFileID = {eeg_file.id}' - print(message) - exit(lib.exitcode.MISSING_FILES) - elif eeg_file.archive.blake2b_hash != blake2: - message = '\nERROR: blake2b hash of ' + archive_full_path +\ - ' does not match the one stored in the database.' +\ - '\nblake2b of ' + archive_full_path + ': ' + blake2 +\ - '\nblake2b in the database: ' + eeg_file.archive.blake2b_hash - print(message) - exit(lib.exitcode.CORRUPTED_FILE) - else: - return - - # create the archive directory if it does not exist - lib.utilities.create_dir( - os.path.dirname(archive_full_path), - self.env.verbose - ) - - # create the archive file - utilities.create_archive(files_to_archive, archive_full_path) - - # insert the archive file in physiological_archive - blake2 = compute_file_blake2b_hash(archive_full_path) - archive_info = { - 'PhysiologicalFileID': eeg_file.id, - 'Blake2bHash' : blake2, - 'FilePath' : archive_rel_name - } - physiological.insert_archive_file(archive_info) - - def create_and_insert_event_archive( - self, - files_to_archive: list[str], - archive_rel_name: str, - eeg_file: DbPhysioFile, - ): - """ - Create an archive with all event files associated to a specific recording - :param files_to_archive: list of files to include in the archive - :param archive_rel_name: path to the archive relative to data_dir - :param eeg_file : Physiological file object - """ - - # check if archive is on the filesystem - (archive_rel_name, archive_full_path) = self.get_archive_paths(archive_rel_name) - if os.path.isfile(archive_full_path): - blake2 = compute_file_blake2b_hash(archive_full_path) - else: - blake2 = None - - # check if archive already inserted in database and matches the one - # on the filesystem using blake2b hash - physiological_event_archive_obj = PhysiologicalEventArchive(self.db, self.env.verbose) - - if eeg_file.event_archive is not None: - if not blake2: - message = '\nERROR: no archive was found on the filesystem ' + \ - 'while an entry was found in the database for ' + \ - 'PhysiologicalFileID = ' + str(eeg_file.id) - print(message) - exit(lib.exitcode.MISSING_FILES) - elif eeg_file.event_archive.blake2b_hash != blake2: - message = '\nERROR: blake2b hash of ' + archive_full_path +\ - ' does not match the one stored in the database.' +\ - '\nblake2b of ' + archive_full_path + ': ' + blake2 +\ - '\nblake2b in the database: ' + eeg_file.event_archive.blake2b_hash - print(message) - exit(lib.exitcode.CORRUPTED_FILE) - else: - return - - # create the archive directory if it does not exist - lib.utilities.create_dir( - os.path.dirname(archive_full_path), - self.env.verbose - ) - - # create the archive file - utilities.create_archive(files_to_archive, archive_full_path) - - # insert the archive into the physiological_annotation_archive table - blake2 = compute_file_blake2b_hash(archive_full_path) - physiological_event_archive_obj.insert(eeg_file.id, blake2, archive_rel_name) - - def get_archive_paths(self, archive_rel_name): - package_path = get_eeg_pre_package_download_dir_path_config(self.env) - if package_path: - raw_package_dir = os.path.join(package_path, 'raw') - os.makedirs(raw_package_dir, exist_ok=True) - archive_rel_name = os.path.basename(archive_rel_name) - archive_full_path = os.path.join(raw_package_dir, archive_rel_name) - else: - archive_full_path = os.path.join(self.data_dir, archive_rel_name) - - return (archive_rel_name, archive_full_path) + copy_loris_bids_file(self.info, Path(file), loris_file_path) + return loris_file_path diff --git a/python/lib/imaging_lib/scan_type.py b/python/lib/imaging_lib/scan_type.py new file mode 100644 index 000000000..dfa552600 --- /dev/null +++ b/python/lib/imaging_lib/scan_type.py @@ -0,0 +1,17 @@ +from lib.db.models.mri_scan_type import DbMriScanType +from lib.env import Env + + +def create_mri_scan_type(env: Env, name: str) -> DbMriScanType: + """ + Create an MRI scan type in the database. + """ + + scan_type = DbMriScanType( + name = name, + ) + + env.db.add(scan_type) + env.db.flush() + + return scan_type diff --git a/python/lib/import_bids_dataset/acquisitions.py b/python/lib/import_bids_dataset/acquisitions.py new file mode 100644 index 000000000..8d81828da --- /dev/null +++ b/python/lib/import_bids_dataset/acquisitions.py @@ -0,0 +1,43 @@ +from collections.abc import Callable +from typing import TypeVar + +from loris_bids_reader.info import BidsAcquisitionInfo + +from lib.env import Env +from lib.import_bids_dataset.env import BidsImportEnv +from lib.logging import log, log_error + +T = TypeVar('T') + + +def import_bids_acquisitions( + env: Env, + import_env: BidsImportEnv, + acquisitions: list[tuple[T, BidsAcquisitionInfo]], + importer: Callable[[T, BidsAcquisitionInfo], None] +): + """ + Run an import function on a list of BIDS acquisitions, logging the overall import progress, + and catching the eventual exceptions raised during each import. + """ + + for acquisition, bids_info in acquisitions: + log( + env, + f"Importing {bids_info.data_type} acquisition '{bids_info.name}'...", + ) + + try: + importer(acquisition, bids_info) + log(env, f"Successfully imported acquisition '{bids_info.name}'.") + import_env.imported_acquisitions_count += 1 + except Exception as exception: + log_error( + env, + ( + f"Error while importing acquisition '{bids_info.name}'. Error message:\n" + f"{exception}\n" + "Skipping." + ) + ) + import_env.failed_acquisitions_count += 1 diff --git a/python/lib/import_bids_dataset/archive.py b/python/lib/import_bids_dataset/archive.py new file mode 100644 index 000000000..84808d2cc --- /dev/null +++ b/python/lib/import_bids_dataset/archive.py @@ -0,0 +1,78 @@ +from pathlib import Path + +from loris_utils.archive import create_archive_with_files +from loris_utils.crypto import compute_file_blake2b_hash +from loris_utils.path import remove_path_extension + +from lib.config import get_data_dir_path_config, get_ephys_archive_dir_path_config +from lib.db.models.physio_event_archive import DbPhysioEventArchive +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.physio_file_archive import DbPhysioFileArchive +from lib.env import Env + + +def import_physio_file_archive(env: Env, physio_file: DbPhysioFile, file_paths: list[Path]): + """ + Create and import a physiological file archive into LORIS. + """ + + archive_rel_path = get_archive_path(env, physio_file.path) + + data_dir_path = get_data_dir_path_config(env) + archive_path = data_dir_path / archive_rel_path + if archive_path.exists(): + raise Exception(f"Archive '{archive_rel_path}' already exists on the file system.") + + archive_path.parent.mkdir(exist_ok=True) + + create_archive_with_files(archive_path, file_paths) + + blake2b_hash = compute_file_blake2b_hash(archive_path) + + env.db.add(DbPhysioFileArchive( + physio_file_id = physio_file.id, + path = archive_rel_path, + blake2b_hash = blake2b_hash, + )) + + env.db.flush() + + +def import_physio_event_archive(env: Env, physio_file: DbPhysioFile, file_paths: list[Path]): + """ + Create and import a physiological event archive into LORIS. The name of the archive is based on + the first file path provided. + """ + + data_dir_path = get_data_dir_path_config(env) + archive_rel_path = remove_path_extension(file_paths[0].relative_to(data_dir_path)).with_suffix('.tgz') + + archive_path = data_dir_path / archive_rel_path + if archive_path.exists(): + raise Exception(f"Event archive '{archive_rel_path}' already exists on the file system.") + + create_archive_with_files(archive_path, file_paths) + + blake2b_hash = compute_file_blake2b_hash(archive_path) + + env.db.add(DbPhysioEventArchive( + physio_file_id = physio_file.id, + path = archive_rel_path, + blake2b_hash = blake2b_hash, + )) + + env.db.flush() + + +def get_archive_path(env: Env, file_path: Path) -> Path: + """ + Get the path of a physiological file archive relative to the LORIS data directory. + """ + + archive_rel_path = remove_path_extension(file_path).with_suffix('.tgz') + archive_dir_path = get_ephys_archive_dir_path_config(env) + if archive_dir_path is not None: + data_dir_path = get_data_dir_path_config(env) + return (archive_dir_path / 'raw' / archive_rel_path.name).relative_to(data_dir_path) + else: + return archive_rel_path diff --git a/python/lib/import_bids_dataset/copy_files.py b/python/lib/import_bids_dataset/copy_files.py index 4df03ddd3..ae68ef9c8 100644 --- a/python/lib/import_bids_dataset/copy_files.py +++ b/python/lib/import_bids_dataset/copy_files.py @@ -1,23 +1,102 @@ - import os +import re +import shutil +from pathlib import Path from loris_bids_reader.files.scans import BidsScansTsvFile import lib.utilities +from lib.db.models.session import DbSession +from lib.import_bids_dataset.env import BidsImportEnv + + +def get_loris_bids_file_path( + import_env: BidsImportEnv, + session: DbSession, + data_type: str, + file_path: Path, + derivative: bool = False, +) -> Path: + """ + Get the path of a BIDS file in LORIS, relative to the LORIS data directory. + """ + + # In the import is run in no-copy mode, simply return the original file path. + if import_env.loris_bids_path is None: + return file_path.relative_to(import_env.data_dir_path) + + # If the file is a derivative, the path is unpredictable, so return a copy of that path in the + # LORIS BIDS dataset. + if derivative: + return import_env.loris_bids_path / file_path.relative_to(import_env.source_bids_path) + + # Otherwise, normalize the subject and session directrory names using the LORIS session + # information. + loris_file_name = get_loris_bids_file_name(file_path.name, session) + + return ( + import_env.loris_bids_path + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / data_type + / loris_file_name + ) + + +def get_loris_bids_file_name(file_name: str, session: DbSession) -> str: + """ + Get the name of a BIDS file in LORIS, replacing or adding the BIDS subject and session labels + with the LORIS PSCID and visit label. + """ + + # Remove the subject and session entities if they are present. + file_name = re.sub(r'sub-[a-zA-Z0-9]+_?', '', file_name) + file_name = re.sub(r'ses-[a-zA-Z0-9]+_?', '', file_name) + + # Add the LORIS subject and session information back in the correct order. + return f'sub-{session.candidate.psc_id}_ses-{session.visit_label}_{file_name}' + + +def copy_loris_bids_file(import_env: BidsImportEnv, file_path: Path, loris_file_path: Path): + """ + Copy a BIDS file to the LORIS data directory, unless the no-copy mode is enabled. + """ + + # Do not copy the file in no-copy mode. + if import_env.loris_bids_path is None: + return + + full_loris_file_path = import_env.data_dir_path / loris_file_path + + if full_loris_file_path.exists(): + raise Exception(f"File '{loris_file_path}' already exists in LORIS.") + + full_loris_file_path.parent.mkdir(parents=True, exist_ok=True) + if file_path.is_file(): + shutil.copyfile(file_path, full_loris_file_path) + elif file_path.is_dir(): + shutil.copytree(file_path, full_loris_file_path) +# TODO: This function is ugly and should be replaced. def copy_scans_tsv_file_to_loris_bids_dir( scans_file: BidsScansTsvFile, - bids_sub_id: str, - loris_bids_root_dir: str, - data_dir: str, + session: DbSession, + loris_bids_root_dir: Path, + data_dir: Path, ) -> str: """ Copy the scans.tsv file to the LORIS BIDS directory for the subject. """ original_file_path = scans_file.path - final_file_path = os.path.join(loris_bids_root_dir, f'sub-{bids_sub_id}', scans_file.path.name) + loris_file_name = get_loris_bids_file_name(scans_file.path.name, session) + final_file_path = ( + loris_bids_root_dir + / f'sub-{session.candidate.psc_id}' + / f'ses-{session.visit_label}' + / loris_file_name + ) # copy the scans.tsv file to the new directory if os.path.exists(final_file_path): diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py new file mode 100644 index 000000000..2a849fcaf --- /dev/null +++ b/python/lib/import_bids_dataset/env.py @@ -0,0 +1,39 @@ +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class BidsImportEnv: + """ + Information about a specific BIDS import pipeline run. + """ + + data_dir_path: Path + """ + The LORIS data directory path. + """ + + source_bids_path: Path + """ + The source BIDS directory path. + """ + + loris_bids_path: Path | None + """ + The LORIS BIDS directory path for this import, relative to the LORIS data directory. + """ + + imported_acquisitions_count: int = 0 + """ + The number of succesfully imported BIDS acquisitions. + """ + + ignored_acquisitions_count: int = 0 + """ + The number of ignored BIDS acquisition imports. + """ + + failed_acquisitions_count: int = 0 + """ + The number of failed BIDS acquisition imports. + """ diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py new file mode 100644 index 000000000..0ce87e19c --- /dev/null +++ b/python/lib/import_bids_dataset/mri.py @@ -0,0 +1,218 @@ +from pathlib import Path +from typing import Any + +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_bids_reader.mri.acquisition import MriAcquisition +from loris_bids_reader.mri.reader import BidsMriDataTypeReader +from loris_utils.crypto import compute_file_blake2b_hash +from loris_utils.error import group_errors_tuple + +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.db.queries.file import try_get_file_with_hash, try_get_file_with_path +from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name +from lib.env import Env +from lib.imaging_lib.file import register_mri_file +from lib.imaging_lib.file_parameter import register_mri_file_parameter, register_mri_file_parameters +from lib.imaging_lib.nifti import add_nifti_spatial_file_parameters +from lib.imaging_lib.nifti_pic import create_nifti_preview_picture +from lib.imaging_lib.scan_type import create_mri_scan_type +from lib.import_bids_dataset.acquisitions import import_bids_acquisitions +from lib.import_bids_dataset.copy_files import copy_loris_bids_file, get_loris_bids_file_path +from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type_from_extension +from lib.import_bids_dataset.mri_sidecar import add_bids_mri_sidecar_file_parameters +from lib.import_bids_dataset.scans import add_bids_scans_file_parameters +from lib.logging import log + +KNOWN_SUFFIXES_PER_MRI_DATA_TYPE = { + 'anat': [ + 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', 'FLASH', 'PD', 'PDmap', 'PDT2', + 'inplaneT1', 'inplaneT2', 'angio', + ], + 'func': [ + 'bold', 'cbv', 'phase', + ], + 'dwi': [ + 'dwi', 'sbref', + ], + 'fmap': [ + 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'fieldmap', 'epi', + ], +} + + +def import_bids_mri_data_type( + env: Env, + import_env: BidsImportEnv, + session: DbSession, + data_type: BidsMriDataTypeReader, +): + """ + Import the MRI acquisitions found in a BIDS MRI data type directory. + """ + + import_bids_acquisitions( + env, + import_env, + data_type.acquisitions, + lambda acquisition, bids_info: import_bids_mri_acquisition( + env, + import_env, + session, + acquisition, + bids_info, + ), + ) + + +def import_bids_mri_acquisition( + env: Env, + import_env: BidsImportEnv, + session: DbSession, + acquisition: MriAcquisition, + bids_info: BidsAcquisitionInfo, +): + """ + Import a BIDS NIfTI file and its associated files in LORIS. + """ + + # The files to copy to LORIS, with the source path on the left and the LORIS path on the right. + files_to_copy: list[tuple[Path, Path]] = [] + + loris_file_path = get_loris_bids_file_path(import_env, session, bids_info.data_type, acquisition.nifti_path) + files_to_copy.append((acquisition.nifti_path, loris_file_path)) + + # Check whether the file is already registered in LORIS. + + loris_file = try_get_file_with_path(env.db, loris_file_path) + if loris_file is not None: + import_env.ignored_acquisitions_count += 1 + log(env, f"File '{loris_file_path}' is already registered in LORIS. Skipping.") + return + + # Get information about the file. + + file_type, file_hash, scan_type = group_errors_tuple( + f"Error while checking database information for MRI acquisition '{bids_info.name}'.", + lambda: get_check_bids_imaging_file_type_from_extension(env, acquisition.nifti_path), + lambda: get_check_bids_nifti_file_hash(env, acquisition), + lambda: get_check_bids_nifti_mri_scan_type(env, bids_info), + ) + + # Get the auxiliary files. + + # The auxiliary files to the NIfTI file and its sidecar, with the file type on the left and the + # file path on the right. + aux_file_paths: list[tuple[str, Path]] = [] + + if acquisition.bval_path is not None: + aux_file_paths.append(('bval', acquisition.bval_path)) + + if acquisition.bvec_path is not None: + aux_file_paths.append(('bvec', acquisition.bvec_path)) + + if acquisition.physio_path is not None: + aux_file_paths.append(('physio', acquisition.physio_path)) + + if acquisition.events_path is not None: + aux_file_paths.append(('events', acquisition.events_path)) + + # Get the file parameters. + + file_parameters: dict[str, Any] = {} + + if acquisition.sidecar_file is not None: + add_bids_mri_sidecar_file_parameters(env, acquisition.sidecar_file, file_parameters) + json_loris_path = get_loris_bids_file_path( + import_env, + session, + bids_info.data_type, + acquisition.sidecar_file.path, + ) + + files_to_copy.append((acquisition.sidecar_file.path, json_loris_path)) + file_parameters['bids_json_file'] = json_loris_path + file_parameters['bids_json_file_blake2b_hash'] = compute_file_blake2b_hash(acquisition.sidecar_file.path) + + add_nifti_spatial_file_parameters(acquisition.nifti_path, file_parameters) + file_parameters['file_blake2b_hash'] = file_hash + + if bids_info.scans_file is not None and bids_info.scan_row is not None: + add_bids_scans_file_parameters(bids_info.scans_file, bids_info.scan_row, file_parameters) + + for aux_file_type, aux_file_path in aux_file_paths: + aux_file_hash = compute_file_blake2b_hash(aux_file_path) + aux_file_loris_path = get_loris_bids_file_path(import_env, session, bids_info.data_type, aux_file_path) + files_to_copy.append((aux_file_path, aux_file_loris_path)) + file_parameters[f'bids_{aux_file_type}'] = str(aux_file_loris_path) + file_parameters[f'bids_{aux_file_type}_blake2b_hash'] = aux_file_hash + + # Copy the files on the file system. + for copied_file_path, loris_copied_file_path in files_to_copy: + copy_loris_bids_file(import_env, copied_file_path, loris_copied_file_path) + + # Register the file and its parameters in the database. + + file = register_mri_file( + env, + loris_file_path, + file_type, + session, + scan_type, + None, + None, + file_parameters.get('SeriesInstanceUID'), + file_parameters.get('EchoTime'), + file_parameters.get('EchoNumber'), + file_parameters.get('PhaseEncodingDirection'), + bids_info.scan_row.get_acquisition_time() if bids_info.scan_row is not None else None, + False, + ) + + register_mri_file_parameters(env, file, file_parameters) + + env.db.commit() + + # Create and register the file picture. + + pic_rel_path = create_nifti_preview_picture(env, file) + + register_mri_file_parameter(env, file, 'check_pic_filename', str(pic_rel_path)) + + env.db.commit() + + +def get_check_bids_nifti_file_hash(env: Env, acquisition: MriAcquisition) -> str: + """ + Compute the BLAKE2b hash of a NIfTI file and raise an exception if that hash is already + registered in the database. + """ + + file_hash = compute_file_blake2b_hash(acquisition.nifti_path) + + file = try_get_file_with_hash(env.db, file_hash) + if file is not None: + raise Exception(f"File with hash '{file_hash}' already present in the database.") + + return file_hash + + +def get_check_bids_nifti_mri_scan_type(env: Env, bids_info: BidsAcquisitionInfo) -> DbMriScanType: + """ + Get the MRI scan type corresponding to a BIDS MRI acquisition using its BIDS suffix. Create the + MRI scan type in the database the suffix is a standard BIDS suffix and the scan type does not + already exist in the database, or raise an exception if no known scan type is found. + """ + + if bids_info.suffix is None: + raise Exception("No BIDS suffix found in the NIfTI file name, cannot infer the file data type.") + + mri_scan_type = try_get_mri_scan_type_with_name(env.db, bids_info.suffix) + if mri_scan_type is not None: + return mri_scan_type + + if bids_info.suffix not in KNOWN_SUFFIXES_PER_MRI_DATA_TYPE[bids_info.data_type]: + raise Exception(f"Found unknown MRI file suffix '{bids_info.suffix}'.") + + return create_mri_scan_type(env, bids_info.suffix) diff --git a/python/lib/import_bids_dataset/scans.py b/python/lib/import_bids_dataset/scans.py new file mode 100644 index 000000000..c7bef44c5 --- /dev/null +++ b/python/lib/import_bids_dataset/scans.py @@ -0,0 +1,20 @@ +from typing import Any + +from loris_bids_reader.files.scans import BidsScansTsvFile, BidsScanTsvRow +from loris_utils.crypto import compute_file_blake2b_hash + + +def add_bids_scans_file_parameters( + scans_file: BidsScansTsvFile, + scan_row: BidsScanTsvRow, + file_parameters: dict[str, Any], +): + """ + Read a BIDS `scans.tsv` file and row, and add its information to the LORIS file parameters + dictionary. + """ + + file_parameters['scan_acquisition_time'] = scan_row.get_acquisition_time() + file_parameters['age_at_scan'] = scan_row.get_age_at_scan() + file_parameters['scans_tsv_file'] = scans_file.path + file_parameters['scans_tsv_file_bake2hash'] = compute_file_blake2b_hash(scans_file.path) diff --git a/python/lib/mri.py b/python/lib/mri.py deleted file mode 100644 index 9017e37d9..000000000 --- a/python/lib/mri.py +++ /dev/null @@ -1,399 +0,0 @@ -"""Deals with MRI BIDS datasets and register them into the database.""" - -import getpass -import os -import re -import sys -from pathlib import Path - -from loris_bids_reader.files.scans import BidsScansTsvFile -from loris_bids_reader.mri.sidecar import BidsMriSidecarJsonFile -from loris_utils.crypto import compute_file_blake2b_hash - -import lib.exitcode -import lib.utilities as utilities -from lib.db.models.session import DbSession -from lib.env import Env -from lib.imaging import Imaging -from lib.import_bids_dataset.copy_files import copy_scans_tsv_file_to_loris_bids_dir -from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type_from_extension - - -class Mri: - """ - This class reads the BIDS MRI data structure and registers the MRI datasets into the - database by calling lib.imaging class. - - :Example: - - from lib.mri import Mri - from lib.database import Database - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality in ['anat', 'dwi', 'fmap', 'func']: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_mri_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/mri/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_mri_rel_dir, verbose - ) - Mri( - env = env, - bids_layout = bids_layout, - session = session, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_mri_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, env: Env, bids_layout, session: DbSession, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, - loris_bids_mri_rel_dir, loris_bids_root_dir): - - # enumerate the different suffixes supported by BIDS per modality type - self.possible_suffix_per_modality = { - 'anat' : [ - 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', - 'FLASH', 'PD', 'PDmap', 'PDT2', 'inplaneT1', 'inplaneT2', 'angio' - ], - 'func' : [ - 'bold', 'cbv', 'phase' - ], - 'dwi' : [ - 'dwi', 'sbref' - ], - 'fmap' : [ - 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', - 'fieldmap', 'epi' - ] - } - - self.env = env - - # load bids objects - self.bids_layout = bids_layout - - # load the LORIS BIDS import root directory where the files will be copied - self.loris_bids_mri_rel_dir = loris_bids_mri_rel_dir - self.loris_bids_root_dir = loris_bids_root_dir - self.data_dir = data_dir - - # load BIDS subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - - # load database handler object and verbose bool - self.db = db - self.verbose = verbose - - # find corresponding CandID and SessionID in LORIS - self.session = session - self.default_vl = default_visit_label - - # grep all the NIfTI files for the modality - self.nifti_files = self.grep_nifti_files() - - # check if a tsv with acquisition dates or age is available for the subject - self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.session.candidate.psc_id, return_type='filename'): - scans_file_path = self.bids_layout.get(suffix='scans', subject=self.session.candidate.psc_id, - return_type='filename', extension='tsv')[0] - self.scans_file = BidsScansTsvFile(Path(scans_file_path)) - - # loop through NIfTI files and register them in the DB - for nifti_file in self.nifti_files: - self.register_raw_file(nifti_file) - - def grep_nifti_files(self): - """ - Returns the list of NIfTI files found for the modality. - - :return: list of NIfTI files found for the modality - :rtype: list - """ - - # grep all the possible suffixes for the modality - modality_possible_suffix = self.possible_suffix_per_modality[self.bids_modality] - - # loop through the possible suffixes and grep the NIfTI files - nii_files_list = [] - for suffix in modality_possible_suffix: - nii_files_list.extend(self.grep_bids_files(suffix, 'nii.gz')) - - # return the list of found NIfTI files - return nii_files_list - - def grep_bids_files(self, bids_type, extension): - """ - Greps the BIDS files and their layout information from the BIDSLayout - and return that list. - - :param bids_type: the BIDS type to use to grep files (T1w, T2w, bold, dwi...) - :type bids_type: str - :param extension: extension of the file to look for (nii.gz, json...) - :type extension: str - - :return: list of files from the BIDS layout - :rtype: list - """ - - if self.bids_ses_id: - return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - else: - return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - - def register_raw_file(self, nifti_file): - """ - Registers raw MRI files and related files into the files and parameter_file tables. - - :param nifti_file: NIfTI file object - :type nifti_file: pybids NIfTI file object - """ - - # insert the NIfTI file - self.fetch_and_insert_nifti_file(nifti_file) - - def fetch_and_insert_nifti_file(self, nifti_file, derivatives=None): - """ - Gather NIfTI file information to insert into the files and parameter_file tables. - Once all the information has been gathered, it will call imaging.insert_imaging_file - that will perform the insertion into the files and parameter_file tables. - - :param nifti_file : NIfTI file object - :type nifti_file : pybids NIfTI file object - :param derivatives: whether the file to be registered is a derivative file - :type derivatives: bool - - :return: dictionary with the inserted file_id and file_path - :rtype: dict - """ - - # load the Imaging object that will be used to insert the imaging data into the database - imaging = Imaging(self.db, self.verbose) - - # load the list of associated files with the NIfTI file - associated_files = nifti_file.get_associations() - - # load the entity information from the NIfTI file - entities = nifti_file.get_entities() - scan_type = entities['suffix'] - - # loop through the associated files to grep JSON, bval, bvec... - sidecar_json = None - other_assoc_files = {} - for assoc_file in associated_files: - file_info = assoc_file.get_entities() - if re.search(r'json$', file_info['extension']): - sidecar_json = BidsMriSidecarJsonFile(Path(assoc_file.path)) - elif re.search(r'bvec$', file_info['extension']): - other_assoc_files['bvec_file'] = assoc_file.path - elif re.search(r'bval$', file_info['extension']): - other_assoc_files['bval_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'events': - other_assoc_files['task_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'physio': - other_assoc_files['physio_file'] = assoc_file.path - - # read the json file if it exists - file_parameters = {} - if sidecar_json is not None: - file_parameters = imaging.map_bids_param_to_loris_param(sidecar_json.data) - # copy the JSON file to the LORIS BIDS import directory - json_path = self.copy_file_to_loris_bids_dir(sidecar_json.path) - file_parameters['bids_json_file'] = json_path - json_blake2 = compute_file_blake2b_hash(sidecar_json.path) - file_parameters['bids_json_file_blake2b_hash'] = json_blake2 - - # grep the file type from the ImagingFileTypes table - file_type = get_check_bids_imaging_file_type_from_extension(self.env, Path(nifti_file.filename)) - - # determine the output type - output_type = 'derivatives' if derivatives else 'native' - if not derivatives: - coordinate_space = 'native' - - # get the acquisition date of the MRI or the age at the time of acquisition - if self.scans_file is not None: - scan_info = self.scans_file.get_row(Path(nifti_file.path)) - if scan_info is not None: - try: - file_parameters['scan_acquisition_time'] = scan_info.get_acquisition_time() - file_parameters['age_at_scan'] = scan_info.get_age_at_scan() - except Exception as error: - print(f"ERROR: {error}") - sys.exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - - # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = copy_scans_tsv_file_to_loris_bids_dir( - self.scans_file, - self.bids_sub_id, - self.loris_bids_root_dir, - self.data_dir, - ) - - file_parameters['scans_tsv_file'] = scans_path - scans_blake2 = compute_file_blake2b_hash(self.scans_file.path) - file_parameters['scans_tsv_file_bake2hash'] = scans_blake2 - - # grep voxel step from the NIfTI file header - step_parameters = imaging.get_nifti_image_step_parameters(nifti_file.path) - file_parameters['xstep'] = step_parameters[0] - file_parameters['ystep'] = step_parameters[1] - file_parameters['zstep'] = step_parameters[2] - - # grep the time length from the NIfTI file header - is_4d_dataset = False - length_parameters = imaging.get_nifti_image_length_parameters(nifti_file.path) - if len(length_parameters) == 4: - file_parameters['time'] = length_parameters[3] - is_4d_dataset = True - - # add all other associated files to the file_parameters so they get inserted - # in parameter_file - for type in other_assoc_files: - original_file_path = other_assoc_files[type] - copied_path = self.copy_file_to_loris_bids_dir(original_file_path) - file_param_name = 'bids_' + type - file_parameters[file_param_name] = copied_path - file_blake2 = compute_file_blake2b_hash(original_file_path) - hash_param_name = file_param_name + '_blake2b_hash' - file_parameters[hash_param_name] = file_blake2 - - # append the blake2b to the MRI file parameters dictionary - blake2 = compute_file_blake2b_hash(nifti_file.path) - file_parameters['file_blake2b_hash'] = blake2 - - # check that the file is not already inserted before inserting it - result = imaging.grep_file_info_from_hash(blake2) - file_id = result['FileID'] if result else None - file_path = result['File'] if result else None - if not file_id: - # grep the scan type ID from the mri_scan_type table (if it is not already in - # the table, it will add a row to the mri_scan_type table) - scan_type_id = self.db.grep_id_from_lookup_table( - id_field_name = 'MriScanTypeID', - table_name = 'mri_scan_type', - where_field_name = 'MriScanTypeName', - where_value = scan_type, - insert_if_not_found = True - ) - - # copy the NIfTI file to the LORIS BIDS import directory - file_path = self.copy_file_to_loris_bids_dir(nifti_file.path) - - # insert the file along with its information into files and parameter_file tables - echo_time = file_parameters['EchoTime'] if 'EchoTime' in file_parameters.keys() else None - echo_nb = file_parameters['EchoNumber'] if 'EchoNumber' in file_parameters.keys() else None - phase_enc_dir = file_parameters['PhaseEncodingDirection'] \ - if 'PhaseEncodingDirection' in file_parameters.keys() else None - file_info = { - 'FileType' : file_type.name, - 'File' : file_path, - 'SessionID' : self.session.id, - 'InsertedByUserID': getpass.getuser(), - 'CoordinateSpace' : coordinate_space, - 'OutputType' : output_type, - 'EchoTime' : echo_time, - 'PhaseEncodingDirection': phase_enc_dir, - 'EchoNumber' : echo_nb, - 'SourceFileID' : None, - 'MriScanTypeID' : scan_type_id - } - file_id = imaging.insert_imaging_file(file_info, file_parameters) - - # create the pic associated with the file - pic_rel_path = imaging.create_imaging_pic( - { - 'cand_id' : self.session.candidate.cand_id, - 'data_dir_path': self.data_dir, - 'file_rel_path': file_path, - 'is_4D_dataset': is_4d_dataset, - 'file_id' : file_id - } - ) - if os.path.exists(os.path.join(self.data_dir, 'pic/', pic_rel_path)): - imaging.insert_parameter_file(file_id, 'check_pic_filename', pic_rel_path) - - return {'file_id': file_id, 'file_path': file_path} - - def copy_file_to_loris_bids_dir(self, file, derivatives_path=None): - """ - Wrapper around the utilities.copy_file function that copies the file - to the LORIS BIDS import directory and returns the relative path of the - file (without the data_dir part). - - :param file: full path to the original file - :type file: str - :param derivatives_path: path to the derivative folder - :type derivatives_path: str - - :return: relative path to the copied file - :rtype: str - """ - - # determine the path of the copied file - copy_file = self.loris_bids_mri_rel_dir - if self.bids_ses_id: - copy_file += os.path.basename(file) - else: - # make sure the ses- is included in the new filename if using - # default visit label from the LORIS config - copy_file += str.replace( - os.path.basename(file), - "sub-" + self.bids_sub_id, - "sub-" + self.bids_sub_id + "_ses-" + self.default_vl - ) - if derivatives_path: - # create derivative subject/vl/modality directory - lib.utilities.create_dir( - derivatives_path + self.loris_bids_mri_rel_dir, - self.verbose - ) - copy_file = derivatives_path + copy_file - else: - copy_file = self.loris_bids_root_dir + copy_file - - # copy the file - utilities.copy_file(file, copy_file, self.verbose) - - # determine the relative path and return it - relative_path = copy_file.replace(self.data_dir, "") - - return relative_path diff --git a/python/lib/physio/chunking.py b/python/lib/physio/chunking.py index 6cd34dc1c..8760c5775 100644 --- a/python/lib/physio/chunking.py +++ b/python/lib/physio/chunking.py @@ -3,7 +3,7 @@ from loris_utils.path import get_path_stem import lib.exitcode -from lib.config import get_data_dir_path_config, get_eeg_chunks_dir_path_config +from lib.config import get_data_dir_path_config, get_ephys_chunks_dir_path_config from lib.db.models.physio_file import DbPhysioFile from lib.db.queries.physio_parameter import try_get_physio_file_parameter_with_file_id_name from lib.env import Env @@ -89,11 +89,11 @@ def get_dataset_chunks_dir_path(env: Env, physio_file: DbPhysioFile): # The first part of the physiological file path is assumed to be the BIDS imports directory # name. The second part of the physiological file path is assumed to be the dataset name. - eeg_chunks_dir_path = get_eeg_chunks_dir_path_config(env) - if eeg_chunks_dir_path is None: + ephys_chunks_dir_path = get_ephys_chunks_dir_path_config(env) + if ephys_chunks_dir_path is None: data_dir_path = get_data_dir_path_config(env) - eeg_chunks_dir_path = data_dir_path / physio_file.path.parts[0] + ephys_chunks_dir_path = data_dir_path / physio_file.path.parts[0] - eeg_chunks_path = eeg_chunks_dir_path / f'{physio_file.path.parts[1]}_chunks' + eeg_chunks_path = ephys_chunks_dir_path / f'{physio_file.path.parts[1]}_chunks' eeg_chunks_path.mkdir(exist_ok=True) return eeg_chunks_path diff --git a/python/lib/physiological.py b/python/lib/physiological.py index 6b8d8022c..068172da0 100644 --- a/python/lib/physiological.py +++ b/python/lib/physiological.py @@ -774,25 +774,3 @@ def insert_event_file(self, events_file: BidsEventsTsvFile, event_file, physiolo ) # insert blake2b hash of task event file into physiological_parameter_file insert_physio_file_parameter(self.env, physiological_file, 'event_file_blake2b_hash', blake2) - - def insert_archive_file(self, archive_info): - """ - Inserts the archive file of all physiological files (including - electrodes.tsv, channels.tsv and events.tsv) in the - physiological_archive table of the database. - - :param archive_info: dictionary with key/value pairs to insert - :type archive_info: dict - """ - - # insert the archive into the physiological_archive table - archive_fields = () - archive_values = () - for key, value in archive_info.items(): - archive_fields = (*archive_fields, key) - archive_values = (*archive_values, value) - self.db.insert( - table_name = 'physiological_archive', - column_names = archive_fields, - values = archive_values - ) diff --git a/python/lib/utilities.py b/python/lib/utilities.py index 837048474..43ce61273 100644 --- a/python/lib/utilities.py +++ b/python/lib/utilities.py @@ -137,6 +137,7 @@ def create_dir(dir_name, verbose): return dir_name +@deprecated('Use `loris_utils.archive.create_archive_with_files` instead') def create_archive(files_to_archive, archive_path): """ Creates an archive with the files listed in the files_to_archive tuple. diff --git a/python/loris_bids_reader/src/loris_bids_reader/info.py b/python/loris_bids_reader/src/loris_bids_reader/info.py index c84a4a369..e106ce6aa 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/info.py +++ b/python/loris_bids_reader/src/loris_bids_reader/info.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from loris_bids_reader.files.participants import BidsParticipantTsvRow +from loris_bids_reader.files.scans import BidsScansTsvFile, BidsScanTsvRow @dataclass @@ -31,6 +32,11 @@ class BidsSessionInfo(BidsSubjectInfo): The BIDS session label. """ + scans_file: BidsScansTsvFile | None + """ + The BIDS `scans.tsv` file of this session, if any. + """ + @dataclass class BidsDataTypeInfo(BidsSessionInfo): @@ -42,3 +48,25 @@ class BidsDataTypeInfo(BidsSessionInfo): """ The BIDS data type name. """ + + +@dataclass +class BidsAcquisitionInfo(BidsDataTypeInfo): + """ + Information about a BIDS acquisition. + """ + + name: str + """ + The name of this acquisition (usually the file name without the extension). + """ + + suffix: str | None + """ + The BIDS suffix of this acquisition, if any. + """ + + scan_row: BidsScanTsvRow | None + """ + The BIDS `scans.tsv` row of this acquisition, if any. + """ diff --git a/python/loris_bids_reader/src/loris_bids_reader/mri/acquisition.py b/python/loris_bids_reader/src/loris_bids_reader/mri/acquisition.py new file mode 100644 index 000000000..a9c071397 --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/mri/acquisition.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass +from pathlib import Path + +from loris_bids_reader.mri.sidecar import BidsMriSidecarJsonFile + + +@dataclass +class MriAcquisition: + """ + An MRI acquisition and its related files. + """ + + nifti_path: Path + """ + The main NIfTI file path. + """ + + sidecar_file: BidsMriSidecarJsonFile | None + """ + The related JSON sidecar file path, if it exists. + """ + + bval_path: Path | None + """ + The related bval file path, if it exists. + """ + + bvec_path: Path | None + """ + The related bvec file path, if it exists. + """ + + physio_path: Path | None + """ + The related physio file path, if it exists. + """ + + events_path: Path | None + """ + The related events file path, if it exists. + """ diff --git a/python/loris_bids_reader/src/loris_bids_reader/mri/reader.py b/python/loris_bids_reader/src/loris_bids_reader/mri/reader.py new file mode 100644 index 000000000..0cef6077e --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/mri/reader.py @@ -0,0 +1,81 @@ + +from dataclasses import dataclass +from functools import cached_property +from pathlib import Path + +from bids.layout import BIDSFile +from loris_utils.path import remove_path_extension + +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_bids_reader.mri.acquisition import MriAcquisition +from loris_bids_reader.mri.sidecar import BidsMriSidecarJsonFile +from loris_bids_reader.reader import BidsDataTypeReader +from loris_bids_reader.utils import find_pybids_file_path, get_pybids_file_path + + +@dataclass +class BidsMriDataTypeReader(BidsDataTypeReader): + @cached_property + def acquisitions(self) -> list[tuple[MriAcquisition, BidsAcquisitionInfo]]: + pybids_layout = self.session.subject.dataset.layout + pybids_files: list[BIDSFile] = pybids_layout.get( # type: ignore + subject = self.session.subject.label, + session = self.session.label, + datatype = self.name, + extension = ['.nii', '.nii.gz'], + ) + + acquisitions: list[tuple[MriAcquisition, BidsAcquisitionInfo]] = [] + for pybids_file in pybids_files: + nifti_path = get_pybids_file_path(pybids_file) + + # Get all associated files + associations: list[BIDSFile] = pybids_file.get_associations() # type: ignore + + # Find associated files using predicates + sidecar_path = find_pybids_file_path(associations, lambda file: file.entities.get('extension') == '.json') + + pybids_bval_path = pybids_layout.get_nearest(pybids_file, extension='.bval') # type: ignore + bval_path = Path(pybids_bval_path) if pybids_bval_path is not None else None # type: ignore + + pybids_bvec_path = pybids_layout.get_nearest(pybids_file, extension='.bvec') # type: ignore + bvec_path = Path(pybids_bvec_path) if pybids_bvec_path is not None else None # type: ignore + + events_path = find_pybids_file_path( + associations, + lambda file: file.entities.get('suffix') == 'events' and file.entities.get('extension') == '.tsv', + ) + + physio_path = find_pybids_file_path( + associations, + lambda file: file.entities.get('suffix') in ['physio', 'stim'] + and file.entities.get('extension') in ['.tsv.gz', '.tsv'], + ) + + sidecar_file = BidsMriSidecarJsonFile(sidecar_path) if sidecar_path is not None else None + scan_row = self.session.scans_file.get_row(nifti_path) if self.session.scans_file is not None else None + acquisition_name = remove_path_extension(nifti_path).name + + bids_info = BidsAcquisitionInfo( + subject = self.session.subject.label, + participant_row = self.session.subject.participant_row, + session = self.session.label, + scans_file = self.session.scans_file, + data_type = self.name, + scan_row = scan_row, + name = acquisition_name, + suffix = pybids_file.entities.get('suffix'), + ) + + acquisition = MriAcquisition( + nifti_path = nifti_path, + sidecar_file = sidecar_file, + bval_path = bval_path, + bvec_path = bvec_path, + physio_path = physio_path, + events_path = events_path, + ) + + acquisitions.append((acquisition, bids_info)) + + return acquisitions diff --git a/python/loris_bids_reader/src/loris_bids_reader/reader.py b/python/loris_bids_reader/src/loris_bids_reader/reader.py index eeb15b402..7eaa8612e 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/reader.py +++ b/python/loris_bids_reader/src/loris_bids_reader/reader.py @@ -1,14 +1,21 @@ import re +from collections.abc import Sequence from dataclasses import dataclass from functools import cached_property from pathlib import Path +from typing import TYPE_CHECKING from bids import BIDSLayout, BIDSLayoutIndexer from loris_bids_reader.files.dataset_description import BidsDatasetDescriptionJsonFile from loris_bids_reader.files.participants import BidsParticipantsTsvFile, BidsParticipantTsvRow +from loris_bids_reader.files.scans import BidsScansTsvFile from loris_bids_reader.info import BidsDataTypeInfo, BidsSessionInfo, BidsSubjectInfo +# Circular imports +if TYPE_CHECKING: + from loris_bids_reader.mri.reader import BidsMriDataTypeReader + PYBIDS_IGNORE = ['.git', 'code/', 'log/', 'sourcedata/'] PYBIDS_FORCE_INDEX = [re.compile(r"_annotations\.(tsv|json)$")] @@ -208,9 +215,42 @@ class BidsSessionReader: """ @cached_property - def data_types(self) -> list['BidsDataTypeReader']: + def scans_file(self) -> BidsScansTsvFile | None: + scans_paths: list[str] = self.subject.dataset.layout.get( # type: ignore + subject=self.subject.label, + session=self.label, + suffix='scans', + return_type='filename', + ) + + if scans_paths == []: + return None + + return BidsScansTsvFile(Path(scans_paths[0])) + + @cached_property + def mri_data_types(self) -> list['BidsMriDataTypeReader']: """ - Get the data type directory readers of this session. + Get the MRI data type directory readers of this session. + """ + + from loris_bids_reader.mri.reader import BidsMriDataTypeReader + + return [ + BidsMriDataTypeReader( + session=self, + name=data_type, # type: ignore + ) for data_type in self.subject.dataset.layout.get_datatypes( # type: ignore + subject=self.subject.label, + session=self.label, + datatype=['anat', 'dwi', 'fmap', 'func'], + ) + ] + + @cached_property + def eeg_data_types(self) -> list['BidsDataTypeReader']: + """ + Get the EEG data type directory readers of this session. """ return [ @@ -220,9 +260,18 @@ def data_types(self) -> list['BidsDataTypeReader']: ) for data_type in self.subject.dataset.layout.get_datatypes( # type: ignore subject=self.subject.label, session=self.label, + datatype=['eeg', 'ieeg'], ) ] + @cached_property + def data_types(self) -> Sequence['BidsDataTypeReader']: + """ + Get all the data type directory readers of this session. + """ + + return self.eeg_data_types + self.mri_data_types + @cached_property def info(self) -> BidsSessionInfo: """ @@ -233,6 +282,7 @@ def info(self) -> BidsSessionInfo: subject = self.subject.label, participant_row = self.subject.participant_row, session = self.label, + scans_file = self.scans_file, ) @@ -262,5 +312,6 @@ def info(self) -> BidsDataTypeInfo: subject = self.session.subject.label, participant_row = self.session.subject.participant_row, session = self.session.label, + scans_file = self.session.scans_file, data_type = self.name, ) diff --git a/python/loris_bids_reader/src/loris_bids_reader/utils.py b/python/loris_bids_reader/src/loris_bids_reader/utils.py new file mode 100644 index 000000000..9d633f697 --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/utils.py @@ -0,0 +1,41 @@ +from collections.abc import Callable +from pathlib import Path +from typing import Any + +from bids import BIDSLayout +from bids.layout import BIDSFile +from loris_utils.iter import find + + +def try_get_pybids_value(layout: BIDSLayout, **args: Any) -> Any | None: + """ + Get zero or one PyBIDS value using the provided arguments, or raise an exception if multiple + values are found. + """ + + match layout.get(args): # type: ignore + case []: + return None + case [value]: # type: ignore + return value # type: ignore + case values: # type: ignore + raise Exception(f"Expected one or zero PyBIDS value but found {len(values)}.") # type: ignore + + +def get_pybids_file_path(file: BIDSFile) -> Path: + """ + Get the path of a PyBIDS file. + """ + + # The PyBIDS file class does not use the standard path object nor supports type checking. + return Path(file.path) # type: ignore + + +def find_pybids_file_path(files: list[BIDSFile], predicate: Callable[[BIDSFile], bool]) -> Path | None: + """ + Find the path of a file in a list of PyBIDS files using a predicate, or return `None` if no + file matches the predicate. + """ + + file = find(files, predicate) + return get_pybids_file_path(file) if file is not None else None diff --git a/python/loris_utils/src/loris_utils/archive.py b/python/loris_utils/src/loris_utils/archive.py new file mode 100644 index 000000000..87431f957 --- /dev/null +++ b/python/loris_utils/src/loris_utils/archive.py @@ -0,0 +1,13 @@ +import tarfile +from pathlib import Path + + +def create_archive_with_files(archive_path: Path, file_paths: list[Path]): + """ + Create a tar archive with the provided files. Files are added to the archive using their base + name, so the name of the provided files should all be distinct. + """ + + with tarfile.open(archive_path, 'w:gz') as tar: + for file_path in file_paths: + tar.add(file_path, arcname=file_path.name) diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index 840cf3e79..b2083b3af 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -10,6 +10,7 @@ from pathlib import Path from loris_bids_reader.files.participants import BidsParticipantsTsvFile +from loris_bids_reader.mri.reader import BidsMriDataTypeReader from loris_bids_reader.reader import BidsDatasetReader from loris_utils.crypto import compute_file_blake2b_hash @@ -27,8 +28,9 @@ from lib.env import Env from lib.import_bids_dataset.check_sessions import check_or_create_bids_sessions from lib.import_bids_dataset.check_subjects import check_or_create_bids_subjects +from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.mri import import_bids_mri_data_type from lib.make_env import make_env -from lib.mri import Mri def main(): @@ -291,6 +293,12 @@ def read_and_insert_bids( hed_union=hed_union ) + import_env = BidsImportEnv( + data_dir_path = Path(data_dir), + source_bids_path = Path(bids_dir), + loris_bids_path = Path(loris_bids_root_dir).relative_to(data_dir) if loris_bids_root_dir is not None else None, + ) + # read list of modalities per session / candidate and register data for data_type_reader in bids_reader.data_types: bids_info = data_type_reader.info @@ -310,35 +318,20 @@ def read_and_insert_bids( session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) - match bids_info.data_type: - case 'eeg' | 'ieeg': + match (data_type_reader, bids_info.data_type): + case (_, 'eeg' | 'ieeg'): Eeg( env, - bids_layout = bids_reader.layout, - session = session, - bids_info = bids_info, - db = db, - data_dir = data_dir, - loris_bids_eeg_rel_dir = loris_bids_data_type_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type - ) - case 'anat' | 'dwi' | 'fmap' | 'func': - Mri( - env, - bids_layout = bids_reader.layout, - session = session, - bids_sub_id = bids_info.subject, - bids_ses_id = bids_info.session, - bids_modality = bids_info.data_type, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_data_type_rel_dir, - loris_bids_root_dir = loris_bids_root_dir + import_env, + bids_layout = bids_reader.layout, + session = session, + bids_info = bids_info, + db = db, + dataset_tag_dict = dataset_tag_dict, + dataset_type = type ) + case (BidsMriDataTypeReader(), _): + import_bids_mri_data_type(env, import_env, session, data_type_reader) case _: print(f"Data type {bids_info.data_type} is not supported. Skipping.") diff --git a/python/tests/integration/scripts/test_import_bids_dataset.py b/python/tests/integration/scripts/test_import_bids_dataset.py index 7400a7ffb..f8b4104f9 100644 --- a/python/tests/integration/scripts/test_import_bids_dataset.py +++ b/python/tests/integration/scripts/test_import_bids_dataset.py @@ -37,7 +37,15 @@ def test_import_eeg_bids_dataset(): db, Path('bids_imports/Face13_BIDSVersion_1.1.0/sub-OTT166/ses-V1/eeg/sub-OTT166_ses-V1_task-faceO_eeg.edf'), ) + assert file is not None + assert file.archive is not None + assert file.event_archive is not None + + assert file.archive.path == \ + Path('bids_imports/Face13_BIDSVersion_1.1.0/sub-OTT166/ses-V1/eeg/sub-OTT166_ses-V1_task-faceO_eeg.tgz') + assert file.event_archive.path == \ + Path('bids_imports/Face13_BIDSVersion_1.1.0/sub-OTT166/ses-V1/eeg/sub-OTT166_ses-V1_task-faceO_events.tgz') # Check that the physiological file parameters has been inserted in the database. file_parameters = get_physio_file_parameters_dict(db, file.id)