diff --git a/pyproject.toml b/pyproject.toml index a7a1cf031..1cd524dce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,7 @@ include = [ "python/lib/get_session_info.py", "python/lib/logging.py", "python/lib/make_env.py", + "python/scripts/import_bids_dataset.py", "python/scripts/import_dicom_study.py", "python/scripts/summarize_dicom_study.py", "python/loris_bids_reader", diff --git a/python/lib/config.py b/python/lib/config.py index b87bb7b16..18032809b 100644 --- a/python/lib/config.py +++ b/python/lib/config.py @@ -34,22 +34,7 @@ def get_data_dir_path_config(env: Env) -> Path: """ data_dir_path = Path(_get_config_value(env, 'dataDirBasepath')) - - if not data_dir_path.is_dir(): - log_error_exit( - env, - ( - f"The LORIS base data directory path configuration value '{data_dir_path}' does not refer to an" - " existing directory." - ) - ) - - if not os.access(data_dir_path, os.R_OK) or not os.access(data_dir_path, os.W_OK): - log_error_exit( - env, - f"Missing read or write permission on the LORIS base data directory '{data_dir_path}'.", - ) - + check_loris_directory(env, data_dir_path, "data") return data_dir_path @@ -60,22 +45,7 @@ def get_dicom_archive_dir_path_config(env: Env) -> Path: """ dicom_archive_dir_path = Path(_get_config_value(env, 'tarchiveLibraryDir')) - - if not dicom_archive_dir_path.is_dir(): - log_error_exit( - env, - ( - f"The LORIS DICOM archive directory path configuration value '{dicom_archive_dir_path}' does not refer" - " to an existing directory." - ), - ) - - if not os.access(dicom_archive_dir_path, os.R_OK) or not os.access(dicom_archive_dir_path, os.W_OK): - log_error_exit( - env, - f"Missing read or write permission on the LORIS DICOM archive directory '{dicom_archive_dir_path}'.", - ) - + check_loris_directory(env, dicom_archive_dir_path, "DICOM archive") return dicom_archive_dir_path @@ -87,74 +57,43 @@ def get_default_bids_visit_label_config(env: Env) -> str | None: return _try_get_config_value(env, 'default_bids_vl') -def get_eeg_viz_enabled_config(env: Env) -> bool: +def get_ephys_visualization_enabled_config(env: Env) -> bool: """ - Get whether the EEG visualization is enabled from the in-database configuration. + Get whether the electrophysiology visualization is enabled from the in-database configuration. """ eeg_viz_enabled = _try_get_config_value(env, 'useEEGBrowserVisualizationComponents') return eeg_viz_enabled == 'true' or eeg_viz_enabled == '1' -def get_eeg_chunks_dir_path_config(env: Env) -> Path | None: +def get_ephys_chunks_dir_path_config(env: Env) -> Path | None: """ - Get the EEG chunks directory path configuration value from the in-database configuration. + Get the electrophysiology chunks directory path configuration value from the in-database + configuration. """ - eeg_chunks_path = _try_get_config_value(env, 'EEGChunksPath') - if eeg_chunks_path is None: + ephys_chunks_path = _try_get_config_value(env, 'EEGChunksPath') + if ephys_chunks_path is None: return None - eeg_chunks_path = Path(eeg_chunks_path) - - if not eeg_chunks_path.is_dir(): - log_error_exit( - env, - ( - f"The configuration value for the LORIS EEG chunks directory path '{eeg_chunks_path}' does not refer to" - " an existing directory." - ), - ) - - if not os.access(eeg_chunks_path, os.R_OK) or not os.access(eeg_chunks_path, os.W_OK): - log_error_exit( - env, - f"Missing read or write permission on the LORIS EEG chunks directory '{eeg_chunks_path}'.", - ) - - return eeg_chunks_path + ephys_chunks_path = Path(ephys_chunks_path) + check_loris_directory(env, ephys_chunks_path, "electrophysiology chunks") + return ephys_chunks_path -def get_eeg_pre_package_download_dir_path_config(env: Env) -> Path | None: +def get_ephys_archive_dir_path_config(env: Env) -> Path | None: """ - Get the EEG pre-packaged download path configuration value from the in-database configuration. + Get the electrophysiology archive directory path configuration value from the in-database + configuration. """ - eeg_pre_package_path = _try_get_config_value(env, 'prePackagedDownloadPath') - if eeg_pre_package_path is None: + ephys_archive_dir_path = _try_get_config_value(env, 'prePackagedDownloadPath') + if ephys_archive_dir_path is None: return None - eeg_pre_package_path = Path(eeg_pre_package_path) - - if not eeg_pre_package_path.is_dir(): - log_error_exit( - env, - ( - "The configuration value for the LORIS EEG pre-packaged download directory path" - f" '{eeg_pre_package_path}' does not refer to an existing directory." - ), - ) - - if not os.access(eeg_pre_package_path, os.R_OK) or not os.access(eeg_pre_package_path, os.W_OK): - log_error_exit( - env, - ( - "Missing read or write permission on the LORIS EEG pre-packaged download directory" - f" '{eeg_pre_package_path}'." - ), - ) - - return eeg_pre_package_path + ephys_archive_dir_path = Path(ephys_archive_dir_path) + check_loris_directory(env, ephys_archive_dir_path, "electrophysiology archive") + return ephys_archive_dir_path def _get_config_value(env: Env, setting_name: str) -> str: @@ -179,6 +118,28 @@ def _get_config_value(env: Env, setting_name: str) -> str: return config.value +def check_loris_directory(env: Env, dir_path: Path, display_name: str): + """ + Check that a LORIS directory exists and is readable and writable, or exit the program with an + error otherwise. + """ + + if not dir_path.is_dir(): + log_error_exit( + env, + ( + f"The LORIS {display_name} directory path configuration value '{dir_path}' does not refer to an" + " existing directory." + ), + ) + + if not os.access(dir_path, os.R_OK) or not os.access(dir_path, os.W_OK): + log_error_exit( + env, + f"Missing read or write permission on the {display_name} directory '{dir_path}'.", + ) + + def _try_get_config_value(env: Env, setting_name: str) -> str | None: """ Get a configuration value from the database using a configuration setting name, or return diff --git a/python/lib/database_lib/physiological_event_archive.py b/python/lib/database_lib/physiological_event_archive.py index ae6ed5b9e..218302831 100644 --- a/python/lib/database_lib/physiological_event_archive.py +++ b/python/lib/database_lib/physiological_event_archive.py @@ -1,6 +1,9 @@ """This class performs database queries for the physiological_event_archive table""" +from typing_extensions import deprecated + +@deprecated('Use `lib.db.physio_event_archive.DbPhysioEventArchive` instead') class PhysiologicalEventArchive: def __init__(self, db, verbose): @@ -17,6 +20,7 @@ def __init__(self, db, verbose): self.table = 'physiological_event_archive' self.verbose = verbose + @deprecated('Use `lib.db.physio_event_archive.DbPhysioEventArchive.physio_file_id` instead') def grep_from_physiological_file_id(self, physiological_file_id): """ Gets rows given a physiological_file_id @@ -33,6 +37,7 @@ def grep_from_physiological_file_id(self, physiological_file_id): args=(physiological_file_id,) ) + @deprecated('Use `lib.db.physio_event_archive.DbPhysioEventArchive` instead') def insert(self, physiological_file_id, blake2, archive_path): """ Inserts a new entry in the physiological_event_archive table. diff --git a/python/lib/db/models/physio_coord_system_electrode.py b/python/lib/db/models/physio_coord_system_electrode.py new file mode 100644 index 000000000..29eb605c1 --- /dev/null +++ b/python/lib/db/models/physio_coord_system_electrode.py @@ -0,0 +1,14 @@ +from datetime import datetime + +from sqlalchemy.orm import Mapped, mapped_column + +from lib.db.base import Base + + +class DbPhysioCoordSystemElectrode(Base): + __tablename__ = 'physiological_coord_system_electrode_rel' + + coord_system_id : Mapped[int] = mapped_column('PhysiologicalCoordSystemID', primary_key=True) + electrode_id : Mapped[int] = mapped_column('PhysiologicalElectrodeID', primary_key=True) + physio_file_id : Mapped[int] = mapped_column('PhysiologicalFileID') + insert_time : Mapped[datetime] = mapped_column('InsertTime', default=datetime.now) diff --git a/python/lib/db/models/physio_coord_system_point_3d.py b/python/lib/db/models/physio_coord_system_point_3d.py new file mode 100644 index 000000000..d01e429e0 --- /dev/null +++ b/python/lib/db/models/physio_coord_system_point_3d.py @@ -0,0 +1,11 @@ +from sqlalchemy.orm import Mapped, mapped_column + +from lib.db.base import Base + + +class DbPhysioCoordSystemPoint3d(Base): + __tablename__ = 'physiological_coord_system_point_3d_rel' + + coord_system_id : Mapped[int] = mapped_column('PhysiologicalCoordSystemID', primary_key=True) + point_3d_id : Mapped[int] = mapped_column('Point3DID', primary_key=True) + name : Mapped[str] = mapped_column('Name') diff --git a/python/lib/db/models/physio_electrode.py b/python/lib/db/models/physio_electrode.py new file mode 100644 index 000000000..1248c711b --- /dev/null +++ b/python/lib/db/models/physio_electrode.py @@ -0,0 +1,26 @@ +from pathlib import Path + +from sqlalchemy import ForeignKey +from sqlalchemy.orm import Mapped, mapped_column, relationship + +import lib.db.models.physio_electrode_material as db_physio_electrode_material +import lib.db.models.physio_electrode_type as db_physio_electrode_type +import lib.db.models.point_3d as db_point_3d +from lib.db.base import Base +from lib.db.decorators.string_path import StringPath + + +class DbPhysioElectrode(Base): + __tablename__ = 'physiological_electrode' + + id : Mapped[int] = mapped_column('PhysiologicalElectrodeID', primary_key=True) + type_id : Mapped[int | None] = mapped_column('PhysiologicalElectrodeTypeID', ForeignKey('physiological_electrode_type.PhysiologicalElectrodeTypeID')) + material_id : Mapped[int | None] = mapped_column('PhysiologicalElectrodeMaterialID', ForeignKey('physiological_electrode_material.PhysiologicalElectrodeMaterialID')) + name : Mapped[str] = mapped_column('Name') + point_3d_id : Mapped[int] = mapped_column('Point3DID', ForeignKey('point_3d.Point3DID')) + impedance : Mapped[int | None] = mapped_column('Impedance') + file_path : Mapped[Path | None] = mapped_column('FilePath', StringPath) + + type : Mapped['db_physio_electrode_type.DbPhysioElectrodeType | None'] = relationship('DbPhysioElectrodeType') + material : Mapped['db_physio_electrode_material.DbPhysioElectrodeMaterial | None'] = relationship('DbPhysioElectrodeMaterial') + point_3d : Mapped['db_point_3d.DbPoint3D'] = relationship('DbPoint3D') diff --git a/python/lib/db/models/physio_electrode_material.py b/python/lib/db/models/physio_electrode_material.py new file mode 100644 index 000000000..3cdb3635a --- /dev/null +++ b/python/lib/db/models/physio_electrode_material.py @@ -0,0 +1,10 @@ +from sqlalchemy.orm import Mapped, mapped_column + +from lib.db.base import Base + + +class DbPhysioElectrodeMaterial(Base): + __tablename__ = 'physiological_electrode_material' + + id : Mapped[int] = mapped_column('PhysiologicalElectrodeMaterialID', primary_key=True) + name : Mapped[str] = mapped_column('ElectrodeMaterial') diff --git a/python/lib/db/models/physio_electrode_type.py b/python/lib/db/models/physio_electrode_type.py new file mode 100644 index 000000000..1b559ace7 --- /dev/null +++ b/python/lib/db/models/physio_electrode_type.py @@ -0,0 +1,10 @@ +from sqlalchemy.orm import Mapped, mapped_column + +from lib.db.base import Base + + +class DbPhysioElectrodeType(Base): + __tablename__ = 'physiological_electrode_type' + + id : Mapped[int] = mapped_column('PhysiologicalElectrodeTypeID', primary_key=True) + name : Mapped[str] = mapped_column('ElectrodeType') diff --git a/python/lib/db/models/physio_event_archive.py b/python/lib/db/models/physio_event_archive.py index 38635b834..7b9bed8e9 100644 --- a/python/lib/db/models/physio_event_archive.py +++ b/python/lib/db/models/physio_event_archive.py @@ -14,6 +14,6 @@ class DbPhysioEventArchive(Base): id : Mapped[int] = mapped_column('EventArchiveID', primary_key=True) physio_file_id : Mapped[int] = mapped_column('PhysiologicalFileID', ForeignKey('physiological_file.PhysiologicalFileID')) blake2b_hash : Mapped[str] = mapped_column('Blake2bHash') - file_path : Mapped[Path] = mapped_column('FilePath', StringPath) + path : Mapped[Path] = mapped_column('FilePath', StringPath) physio_file: Mapped['db_physio_file.DbPhysioFile'] = relationship('DbPhysioFile') diff --git a/python/lib/db/models/physio_file.py b/python/lib/db/models/physio_file.py index 565a1548b..bd9aa4c4c 100644 --- a/python/lib/db/models/physio_file.py +++ b/python/lib/db/models/physio_file.py @@ -28,10 +28,21 @@ class DbPhysioFile(Base): type : Mapped[str | None] = mapped_column('FileType') acquisition_time : Mapped[datetime | None] = mapped_column('AcquisitionTime') inserted_by_user : Mapped[str] = mapped_column('InsertedByUser') - path : Mapped[Path] = mapped_column('FilePath', StringPath) index : Mapped[int | None] = mapped_column('Index') parent_id : Mapped[int | None] = mapped_column('ParentID') + path: Mapped[Path] = mapped_column('FilePath', StringPath) + """ + The path of this physiological file, which may be a directory (notably for MEG CTF data). The + path is relative to the LORIS data directory. + """ + + download_path: Mapped[Path] = mapped_column('DownloadPath', StringPath) + """ + The path from which to download this physiological file, which is guaranteed to be a normal + file or an archive. The path is relative to the LORIS data directory. + """ + head_shape_file_id: Mapped[int | None] = mapped_column('HeadShapeFileID', ForeignKey('meg_ctf_head_shape_file.ID')) """ ID of the head shape file associated to this file, which is only present for MEG CTF files. diff --git a/python/lib/db/models/physio_file_archive.py b/python/lib/db/models/physio_file_archive.py index b8f32ad10..2004cdfd0 100644 --- a/python/lib/db/models/physio_file_archive.py +++ b/python/lib/db/models/physio_file_archive.py @@ -16,6 +16,6 @@ class DbPhysioFileArchive(Base): physio_file_id : Mapped[int] = mapped_column('PhysiologicalFileID', ForeignKey('physiological_file.PhysiologicalFileID')) insert_time : Mapped[datetime] = mapped_column('InsertTime', default=datetime.now) blake2b_hash : Mapped[str] = mapped_column('Blake2bHash') - file_path : Mapped[Path] = mapped_column('FilePath', StringPath) + path : Mapped[Path] = mapped_column('FilePath', StringPath) physio_file: Mapped['db_physio_file.DbPhysioFile'] = relationship('DbPhysioFile') diff --git a/python/lib/db/models/point_3d.py b/python/lib/db/models/point_3d.py new file mode 100644 index 000000000..86d36d4c4 --- /dev/null +++ b/python/lib/db/models/point_3d.py @@ -0,0 +1,12 @@ +from sqlalchemy.orm import Mapped, mapped_column + +from lib.db.base import Base + + +class DbPoint3D(Base): + __tablename__ = 'point_3d' + + id : Mapped[int] = mapped_column('Point3DID', primary_key=True) + x : Mapped[float] = mapped_column('X') + y : Mapped[float] = mapped_column('Y') + z : Mapped[float] = mapped_column('Z') diff --git a/python/lib/db/queries/hed_schema_node.py b/python/lib/db/queries/hed_schema_node.py new file mode 100644 index 000000000..8c793c6c8 --- /dev/null +++ b/python/lib/db/queries/hed_schema_node.py @@ -0,0 +1,14 @@ +from collections.abc import Sequence + +from sqlalchemy import select +from sqlalchemy.orm import Session as Database + +from lib.db.models.hed_schema_node import DbHedSchemaNode + + +def get_all_hed_schema_nodes(db: Database) -> Sequence[DbHedSchemaNode]: + """ + Get all the HED schema nodes from the database. + """ + + return db.execute(select(DbHedSchemaNode)).scalars().all() diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 8e88d48bb..4af43a96c 100644 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -14,17 +14,13 @@ import lib.exitcode import lib.utilities as utilities -from lib.config import get_eeg_pre_package_download_dir_path_config, get_eeg_viz_enabled_config -from lib.database_lib.physiological_event_archive import PhysiologicalEventArchive +from lib.config import get_ephys_visualization_enabled_config from lib.db.models.physio_file import DbPhysioFile from lib.db.models.session import DbSession from lib.db.queries.physio_file import try_get_physio_file_with_path from lib.env import Env -from lib.import_bids_dataset.copy_files import ( - copy_loris_bids_file, - copy_scans_tsv_file_to_loris_bids_dir, - get_loris_bids_file_path, -) +from lib.import_bids_dataset.archive import import_physio_event_archive, import_physio_file_archive +from lib.import_bids_dataset.copy_files import copy_loris_bids_file, get_loris_bids_file_path from lib.import_bids_dataset.env import BidsImportEnv from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type_from_extension from lib.import_bids_dataset.physio import ( @@ -50,17 +46,14 @@ def __init__(self, env: Env, import_env: BidsImportEnv, bids_layout, bids_info: """ Constructor method for the Eeg class. - :param bids_reader : dictionary with BIDS reader information - :type bids_reader : dict + :param bids_layout : PyBIDS layout :param bids_info : the BIDS data type information :param session : The LORIS session the EEG datasets are linked to :param db : Database class object :type db : object :param info : The BIDS import pipeline information :param dataset_tag_dict : Dict of dataset-inherited HED tags - :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset - :type dataset_type : string """ self.env = env @@ -190,37 +183,30 @@ def register_data(self, derivatives=False, detect=True): ) # archive all files in a tar ball for downloading all files at once - files_to_archive: list[str] = [os.path.join(self.data_dir, eeg_file.path)] + files_to_archive: list[Path] = [self.data_dir / eeg_file.path] if eegjson_file_path: - files_to_archive.append(os.path.join(self.data_dir, eegjson_file_path)) + files_to_archive.append(self.data_dir / eegjson_file_path) + if channel_file_path: + files_to_archive.append(self.data_dir / channel_file_path) if fdt_file_path: - files_to_archive.append(os.path.join(self.data_dir, fdt_file_path)) + files_to_archive.append(self.data_dir / fdt_file_path) if electrode_file_path: - files_to_archive.append(os.path.join(self.data_dir, electrode_file_path)) + files_to_archive.append(self.data_dir / electrode_file_path) if event_file_paths: # archive all event files in a tar ball for event download - event_files_to_archive: list[str] = [] + event_files_to_archive: list[Path] = [] for event_file_path in event_file_paths: - files_to_archive.append(os.path.join(self.data_dir, event_file_path)) - event_files_to_archive.append(os.path.join(self.data_dir, event_file_path)) + files_to_archive.append(self.data_dir / event_file_path) + event_files_to_archive.append(self.data_dir / event_file_path) - event_archive_rel_name = os.path.splitext(event_file_paths[0])[0] + ".tgz" - self.create_and_insert_event_archive( - event_files_to_archive, event_archive_rel_name, eeg_file - ) + import_physio_event_archive(self.env, eeg_file, event_files_to_archive) - if channel_file_path: - files_to_archive.append(os.path.join(self.data_dir, channel_file_path)) - - archive_rel_name = os.path.splitext(eeg_file.path)[0] + ".tgz" - self.create_and_insert_archive( - files_to_archive, archive_rel_name, eeg_file - ) + import_physio_file_archive(self.env, eeg_file, files_to_archive) # create data chunks for React visualization - if get_eeg_viz_enabled_config(self.env): + if get_ephys_visualization_enabled_config(self.env): create_physio_channels_chunks(self.env, eeg_file) def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): @@ -318,16 +304,8 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True): print(f"ERROR: {error}") sys.exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - if self.info.loris_bids_path: - # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = copy_scans_tsv_file_to_loris_bids_dir( - self.scans_file, - self.session, - self.info.data_dir_path / self.info.loris_bids_path, - self.data_dir, - ) - - eeg_file_data['scans_tsv_file'] = scans_path + # TODO: Better handle scans.tsv path (LORIS one instead of real one). + eeg_file_data['scans_tsv_file'] = self.scans_file.path scans_blake2 = compute_file_blake2b_hash(self.scans_file.path) eeg_file_data['physiological_scans_tsv_file_bake2hash'] = scans_blake2 @@ -694,126 +672,3 @@ def copy_file_to_loris_bids_dir(self, file, derivatives=False): copy_loris_bids_file(self.info, Path(file), loris_file_path) return loris_file_path - - def create_and_insert_archive(self, files_to_archive: list[str], archive_rel_name: str, eeg_file: DbPhysioFile): - """ - Create an archive with all electrophysiology files associated to a - specific recording (including electrodes.tsv, channels.tsv etc...) - :param files_to_archive: list of files to include in the archive - :param archive_rel_name: path to the archive relative to data_dir - :param eeg_file_id : PhysiologicalFileID - """ - - # load the Physiological object that will be used to insert the - # physiological archive into the database - physiological = Physiological(self.env, self.db, self.env.verbose) - - # check if archive is on the filesystem - (archive_rel_name, archive_full_path) = self.get_archive_paths(archive_rel_name) - if os.path.isfile(archive_full_path): - blake2 = compute_file_blake2b_hash(archive_full_path) - else: - blake2 = None - - # check if archive already inserted in database and matches the one - # on the filesystem using blake2b hash - if eeg_file.archive is not None: - if not blake2: - message = 'ERROR: no archive was found on the filesystem ' + \ - 'while an entry was found in the database for ' + \ - f'PhysiologicalFileID = {eeg_file.id}' - print(message) - exit(lib.exitcode.MISSING_FILES) - elif eeg_file.archive.blake2b_hash != blake2: - message = '\nERROR: blake2b hash of ' + archive_full_path +\ - ' does not match the one stored in the database.' +\ - '\nblake2b of ' + archive_full_path + ': ' + blake2 +\ - '\nblake2b in the database: ' + eeg_file.archive.blake2b_hash - print(message) - exit(lib.exitcode.CORRUPTED_FILE) - else: - return - - # create the archive directory if it does not exist - lib.utilities.create_dir( - os.path.dirname(archive_full_path), - self.env.verbose - ) - - # create the archive file - utilities.create_archive(files_to_archive, archive_full_path) - - # insert the archive file in physiological_archive - blake2 = compute_file_blake2b_hash(archive_full_path) - archive_info = { - 'PhysiologicalFileID': eeg_file.id, - 'Blake2bHash' : blake2, - 'FilePath' : archive_rel_name - } - physiological.insert_archive_file(archive_info) - - def create_and_insert_event_archive( - self, - files_to_archive: list[str], - archive_rel_name: str, - eeg_file: DbPhysioFile, - ): - """ - Create an archive with all event files associated to a specific recording - :param files_to_archive: list of files to include in the archive - :param archive_rel_name: path to the archive relative to data_dir - :param eeg_file : Physiological file object - """ - - # check if archive is on the filesystem - (archive_rel_name, archive_full_path) = self.get_archive_paths(archive_rel_name) - if os.path.isfile(archive_full_path): - blake2 = compute_file_blake2b_hash(archive_full_path) - else: - blake2 = None - - # check if archive already inserted in database and matches the one - # on the filesystem using blake2b hash - physiological_event_archive_obj = PhysiologicalEventArchive(self.db, self.env.verbose) - - if eeg_file.event_archive is not None: - if not blake2: - message = '\nERROR: no archive was found on the filesystem ' + \ - 'while an entry was found in the database for ' + \ - 'PhysiologicalFileID = ' + str(eeg_file.id) - print(message) - exit(lib.exitcode.MISSING_FILES) - elif eeg_file.event_archive.blake2b_hash != blake2: - message = '\nERROR: blake2b hash of ' + archive_full_path +\ - ' does not match the one stored in the database.' +\ - '\nblake2b of ' + archive_full_path + ': ' + blake2 +\ - '\nblake2b in the database: ' + eeg_file.event_archive.blake2b_hash - print(message) - exit(lib.exitcode.CORRUPTED_FILE) - else: - return - - # create the archive directory if it does not exist - lib.utilities.create_dir( - os.path.dirname(archive_full_path), - self.env.verbose - ) - - # create the archive file - utilities.create_archive(files_to_archive, archive_full_path) - - # insert the archive into the physiological_annotation_archive table - blake2 = compute_file_blake2b_hash(archive_full_path) - physiological_event_archive_obj.insert(eeg_file.id, blake2, archive_rel_name) - - def get_archive_paths(self, archive_rel_name): - package_path = get_eeg_pre_package_download_dir_path_config(self.env) - if package_path: - raw_package_dir = os.path.join(package_path, 'raw') - os.makedirs(raw_package_dir, exist_ok=True) - archive_rel_name = os.path.basename(archive_rel_name) - archive_full_path = os.path.join(raw_package_dir, archive_rel_name) - else: - archive_full_path = os.path.join(self.data_dir, archive_rel_name) - - return (archive_rel_name, archive_full_path) diff --git a/python/lib/imaging_lib/scan_type.py b/python/lib/imaging_lib/scan_type.py new file mode 100644 index 000000000..dfa552600 --- /dev/null +++ b/python/lib/imaging_lib/scan_type.py @@ -0,0 +1,17 @@ +from lib.db.models.mri_scan_type import DbMriScanType +from lib.env import Env + + +def create_mri_scan_type(env: Env, name: str) -> DbMriScanType: + """ + Create an MRI scan type in the database. + """ + + scan_type = DbMriScanType( + name = name, + ) + + env.db.add(scan_type) + env.db.flush() + + return scan_type diff --git a/python/lib/import_bids_dataset/acquisitions.py b/python/lib/import_bids_dataset/acquisitions.py new file mode 100644 index 000000000..8d81828da --- /dev/null +++ b/python/lib/import_bids_dataset/acquisitions.py @@ -0,0 +1,43 @@ +from collections.abc import Callable +from typing import TypeVar + +from loris_bids_reader.info import BidsAcquisitionInfo + +from lib.env import Env +from lib.import_bids_dataset.env import BidsImportEnv +from lib.logging import log, log_error + +T = TypeVar('T') + + +def import_bids_acquisitions( + env: Env, + import_env: BidsImportEnv, + acquisitions: list[tuple[T, BidsAcquisitionInfo]], + importer: Callable[[T, BidsAcquisitionInfo], None] +): + """ + Run an import function on a list of BIDS acquisitions, logging the overall import progress, + and catching the eventual exceptions raised during each import. + """ + + for acquisition, bids_info in acquisitions: + log( + env, + f"Importing {bids_info.data_type} acquisition '{bids_info.name}'...", + ) + + try: + importer(acquisition, bids_info) + log(env, f"Successfully imported acquisition '{bids_info.name}'.") + import_env.imported_acquisitions_count += 1 + except Exception as exception: + log_error( + env, + ( + f"Error while importing acquisition '{bids_info.name}'. Error message:\n" + f"{exception}\n" + "Skipping." + ) + ) + import_env.failed_acquisitions_count += 1 diff --git a/python/lib/import_bids_dataset/archive.py b/python/lib/import_bids_dataset/archive.py new file mode 100644 index 000000000..84808d2cc --- /dev/null +++ b/python/lib/import_bids_dataset/archive.py @@ -0,0 +1,78 @@ +from pathlib import Path + +from loris_utils.archive import create_archive_with_files +from loris_utils.crypto import compute_file_blake2b_hash +from loris_utils.path import remove_path_extension + +from lib.config import get_data_dir_path_config, get_ephys_archive_dir_path_config +from lib.db.models.physio_event_archive import DbPhysioEventArchive +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.physio_file_archive import DbPhysioFileArchive +from lib.env import Env + + +def import_physio_file_archive(env: Env, physio_file: DbPhysioFile, file_paths: list[Path]): + """ + Create and import a physiological file archive into LORIS. + """ + + archive_rel_path = get_archive_path(env, physio_file.path) + + data_dir_path = get_data_dir_path_config(env) + archive_path = data_dir_path / archive_rel_path + if archive_path.exists(): + raise Exception(f"Archive '{archive_rel_path}' already exists on the file system.") + + archive_path.parent.mkdir(exist_ok=True) + + create_archive_with_files(archive_path, file_paths) + + blake2b_hash = compute_file_blake2b_hash(archive_path) + + env.db.add(DbPhysioFileArchive( + physio_file_id = physio_file.id, + path = archive_rel_path, + blake2b_hash = blake2b_hash, + )) + + env.db.flush() + + +def import_physio_event_archive(env: Env, physio_file: DbPhysioFile, file_paths: list[Path]): + """ + Create and import a physiological event archive into LORIS. The name of the archive is based on + the first file path provided. + """ + + data_dir_path = get_data_dir_path_config(env) + archive_rel_path = remove_path_extension(file_paths[0].relative_to(data_dir_path)).with_suffix('.tgz') + + archive_path = data_dir_path / archive_rel_path + if archive_path.exists(): + raise Exception(f"Event archive '{archive_rel_path}' already exists on the file system.") + + create_archive_with_files(archive_path, file_paths) + + blake2b_hash = compute_file_blake2b_hash(archive_path) + + env.db.add(DbPhysioEventArchive( + physio_file_id = physio_file.id, + path = archive_rel_path, + blake2b_hash = blake2b_hash, + )) + + env.db.flush() + + +def get_archive_path(env: Env, file_path: Path) -> Path: + """ + Get the path of a physiological file archive relative to the LORIS data directory. + """ + + archive_rel_path = remove_path_extension(file_path).with_suffix('.tgz') + archive_dir_path = get_ephys_archive_dir_path_config(env) + if archive_dir_path is not None: + data_dir_path = get_data_dir_path_config(env) + return (archive_dir_path / 'raw' / archive_rel_path.name).relative_to(data_dir_path) + else: + return archive_rel_path diff --git a/python/lib/import_bids_dataset/args.py b/python/lib/import_bids_dataset/args.py new file mode 100644 index 000000000..b4d8f549d --- /dev/null +++ b/python/lib/import_bids_dataset/args.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Literal + + +@dataclass +class Args: + source_bids_path: Path + type: Literal[None, 'raw', 'derivative'] + bids_validation: bool + create_candidate: bool + create_session: bool + copy: bool + verbose: bool diff --git a/python/lib/import_bids_dataset/channels.py b/python/lib/import_bids_dataset/channels.py new file mode 100644 index 000000000..c52a32322 --- /dev/null +++ b/python/lib/import_bids_dataset/channels.py @@ -0,0 +1,119 @@ +from decimal import Decimal +from pathlib import Path + +from loris_bids_reader.eeg.channels import BidsEegChannelsTsvFile, BidsEegChannelTsvRow +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_utils.error import group_errors, group_errors_tuple + +from lib.db.models.physio_channel_type import DbPhysioChannelType +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.physio_status_type import DbPhysioStatusType +from lib.db.models.session import DbSession +from lib.db.queries.physio_channel import try_get_channel_type_with_name, try_get_status_type_with_name +from lib.env import Env +from lib.import_bids_dataset.copy_files import get_loris_bids_file_path +from lib.import_bids_dataset.env import BidsImportEnv +from lib.physio.channels import insert_physio_channel + + +def insert_bids_channels_file( + env: Env, + import_env: BidsImportEnv, + physio_file: DbPhysioFile, + session: DbSession, + acquisition: BidsAcquisitionInfo, + channels_file: BidsEegChannelsTsvFile, +): + """ + Insert the channels from a BIDS channels file into the database. + """ + + loris_channels_file_path = get_loris_bids_file_path( + import_env, session, acquisition.data_type, channels_file.path + ) + + group_errors( + f"Could not import channels from file '{channels_file.path.name}'.", + ( + lambda: insert_bids_channel( + env, + import_env, + physio_file, + loris_channels_file_path, + channel, + ) for channel in channels_file.rows + ), + ) + + +def insert_bids_channel( + env: Env, + import_env: BidsImportEnv, + physio_file: DbPhysioFile, + loris_channels_file_path: Path, + channel: BidsEegChannelTsvRow, +): + """ + Insert a channel from a BIDS channels file into the database. + """ + + channel_type, status_type = group_errors_tuple( + f"Could not import channel '{channel.data['name']}'.", + lambda: get_bids_physio_channel_type(env, import_env, channel), + lambda: get_bids_physio_status_type(env, import_env, channel), + ) + + insert_physio_channel( + env, + physio_file, + channel_type, + status_type, + loris_channels_file_path, + channel.data['name'], + channel.data['description'], + int(channel.data['sampling_frequency']) if channel.data['sampling_frequency'] is not None else None, + Decimal(channel.data['low_cutoff']) if channel.data['low_cutoff'] is not None else None, + Decimal(channel.data['high_cutoff']) if channel.data['high_cutoff'] is not None else None, + int(channel.data['notch']) if isinstance(channel.data['notch'], float) else None, + channel.data['status_description'], + channel.data['units'], + ) + + +def get_bids_physio_channel_type( + env: Env, + import_env: BidsImportEnv, + channel: BidsEegChannelTsvRow, +) -> DbPhysioChannelType: + """ + Get a physiological channel type from the database using a BIDS channel TSV row, or raise an + exception if that channel type is not found in the database. + """ + + channel_type = try_get_channel_type_with_name(env.db, channel.data['type']) + if channel_type is not None: + return channel_type + + import_env.register_unknown_physio_channel_type(channel.data['type']) + raise Exception(f"Unknown channel type '{channel.data['type']}'.") + + +def get_bids_physio_status_type( + env: Env, + import_env: BidsImportEnv, + channel: BidsEegChannelTsvRow, +) -> DbPhysioStatusType | None: + """ + Get a physiological status type from the database using a BIDS channel TSV row, or raise an + exception if that status type is not found in the database. + """ + + if channel.data['status'] is None: + return None + + status_type = try_get_status_type_with_name(env.db, channel.data['status']) + if status_type is not None: + return status_type + + import_env.register_unknown_physio_status_type(channel.data['status']) + raise Exception(f"Unknown channel status type '{channel.data['status']}'.") diff --git a/python/lib/import_bids_dataset/copy_files.py b/python/lib/import_bids_dataset/copy_files.py index ae68ef9c8..d641eadb3 100644 --- a/python/lib/import_bids_dataset/copy_files.py +++ b/python/lib/import_bids_dataset/copy_files.py @@ -1,13 +1,46 @@ -import os import re import shutil from pathlib import Path +from loris_bids_reader.files.participants import BidsParticipantsTsvFile from loris_bids_reader.files.scans import BidsScansTsvFile +from loris_bids_reader.reader import BidsDatasetReader -import lib.utilities +from lib.config import get_data_dir_path_config from lib.db.models.session import DbSession +from lib.env import Env from lib.import_bids_dataset.env import BidsImportEnv +from lib.logging import log_error_exit + + +def get_loris_bids_dataset_path(env: Env, bids: BidsDatasetReader) -> Path: + """ + Get the LORIS BIDS directory path for the BIDS dataset to import, and create that directory if + it does not exist yet. + """ + + try: + dataset_description = bids.dataset_description_file + except Exception as error: + log_error_exit(env, str(error)) + + if dataset_description is None: + log_error_exit( + env, + "No file 'dataset_description.json' found in the input BIDS dataset.", + ) + + # Sanitize the dataset metadata to have a usable name for the directory. + dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', dataset_description.data['Name']) + dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', dataset_description.data['BIDSVersion']) + + data_dir_path = get_data_dir_path_config(env) + loris_bids_path = data_dir_path / 'bids_imports' / f'{dataset_name}_BIDSVersion_{dataset_version}' + + if not loris_bids_path.exists(): + loris_bids_path.mkdir() + + return loris_bids_path def get_loris_bids_file_path( @@ -78,31 +111,37 @@ def copy_loris_bids_file(import_env: BidsImportEnv, file_path: Path, loris_file_ shutil.copytree(file_path, full_loris_file_path) -# TODO: This function is ugly and should be replaced. -def copy_scans_tsv_file_to_loris_bids_dir( - scans_file: BidsScansTsvFile, - session: DbSession, - loris_bids_root_dir: Path, - data_dir: Path, -) -> str: +def copy_static_dataset_files(source_bids_path: Path, loris_bids_path: Path): """ - Copy the scans.tsv file to the LORIS BIDS directory for the subject. + Copy the static files of the source BIDS dataset to the LORIS BIDS dataset. """ - original_file_path = scans_file.path - loris_file_name = get_loris_bids_file_name(scans_file.path.name, session) - final_file_path = ( - loris_bids_root_dir - / f'sub-{session.candidate.psc_id}' - / f'ses-{session.visit_label}' - / loris_file_name - ) + for file_name in ['README', 'dataset_description.json']: + source_file_path = source_bids_path / file_name + if not source_file_path.is_file(): + continue + + loris_file_path = loris_bids_path / file_name + shutil.copyfile(source_file_path, loris_file_path) + + +def copy_bids_tsv_participants(tsv_participants: BidsParticipantsTsvFile, loris_participants_tsv_path: Path): + """ + Copy some participants.tsv rows into the LORIS participants.tsv file, creating it if necessary. + """ + + if loris_participants_tsv_path.exists(): + tsv_participants.merge(BidsParticipantsTsvFile(loris_participants_tsv_path)) + + tsv_participants.write(loris_participants_tsv_path, ['participant_id']) + + +def copy_bids_tsv_scans(tsv_scans: BidsScansTsvFile, loris_scans_tsv_path: Path): + """ + Copy some scans.tsv rows into a LORIS scans.tsv file, creating it if necessary. + """ - # copy the scans.tsv file to the new directory - if os.path.exists(final_file_path): - lib.utilities.append_to_tsv_file(original_file_path, final_file_path, 'filename', False) # type: ignore - else: - lib.utilities.copy_file(original_file_path, final_file_path, False) # type: ignore + if loris_scans_tsv_path.exists(): + tsv_scans.merge(BidsScansTsvFile(loris_scans_tsv_path)) - # determine the relative path and return it - return os.path.relpath(final_file_path, data_dir) + tsv_scans.write(loris_scans_tsv_path, ['filename', 'acq_time', 'age_at_scan']) diff --git a/python/lib/import_bids_dataset/env.py b/python/lib/import_bids_dataset/env.py index 62c5469a5..3c209f992 100644 --- a/python/lib/import_bids_dataset/env.py +++ b/python/lib/import_bids_dataset/env.py @@ -22,3 +22,62 @@ class BidsImportEnv: """ The LORIS BIDS directory path for this import, relative to the LORIS data directory. """ + + total_files_count : int + unknown_mri_scan_types : list[str] + unknown_physio_channel_types : list[str] + unknown_physio_status_types : list[str] + + imported_acquisitions_count: int = 0 + """ + The number of succesfully imported BIDS acquisitions. + """ + + ignored_acquisitions_count: int = 0 + """ + The number of ignored BIDS acquisition imports. + """ + + failed_acquisitions_count: int = 0 + """ + The number of failed BIDS acquisition imports. + """ + + def __init__(self, data_dir_path: Path, loris_bids_path: Path | None, total_files_count: int): + self.data_dir_path = data_dir_path + self.loris_bids_path = loris_bids_path + self.total_files_count = total_files_count + self.imported_files_count = 0 + self.ignored_files_count = 0 + self.failed_files_count = 0 + self.unknown_mri_scan_types = [] + self.unknown_physio_channel_types = [] + self.unknown_physio_status_types = [] + + @property + def processed_files_count(self) -> int: + return self.imported_files_count + self.ignored_files_count + self.failed_files_count + + def register_unknown_mri_scan_type(self, scan_type: str): + """ + Register an unknown MRI scan type. + """ + + if scan_type not in self.unknown_physio_channel_types: + self.unknown_physio_channel_types.append(scan_type) + + def register_unknown_physio_channel_type(self, channel_type: str): + """ + Register an unknown physiological channel type. + """ + + if channel_type not in self.unknown_physio_channel_types: + self.unknown_physio_channel_types.append(channel_type) + + def register_unknown_physio_status_type(self, status_type: str): + """ + Register an unknown physiological status type. + """ + + if status_type not in self.unknown_physio_status_types: + self.unknown_physio_status_types.append(status_type) diff --git a/python/lib/import_bids_dataset/events.py b/python/lib/import_bids_dataset/events.py new file mode 100644 index 000000000..339e961be --- /dev/null +++ b/python/lib/import_bids_dataset/events.py @@ -0,0 +1,111 @@ +import shutil +from pathlib import Path +from typing import Any + +from loris_bids_reader.json import BidsJsonFile +from loris_bids_reader.reader import BidsDatasetReader +from loris_utils.crypto import compute_file_blake2b_hash + +from lib.db.models.physio_event_file import DbPhysioEventFile +from lib.env import Env +from lib.import_bids_dataset.args import Args +from lib.logging import log_warning +from lib.physio.events import DatasetSource, EventFileSource +from lib.physio.hed import TagGroupMember, build_hed_tag_groups, insert_hed_tag_group +from lib.physio.parameters import insert_physio_file_parameter + + +def get_root_events_metadata( + env: Env, + args: Args, + bids: BidsDatasetReader, + loris_bids_path: Path | None, + project_id: int, +) -> dict[str, dict[str, list[TagGroupMember]]]: + """ + Get the root level 'events.json' data, assuming a singe project for the BIDS dataset. + """ + + events_dict_file = bids.events_dict_file + + if events_dict_file is None: + log_warning(env, "No event metadata files (events.json) in the BIDS root directory.") + return {} + + # Copy the event file to the LORIS BIDS import directory. + + if loris_bids_path is not None: + events_metadata_rel_path = events_dict_file.path.relative_to(loris_bids_path) + events_metadata_path = loris_bids_path / events_metadata_rel_path + shutil.copyfile(events_dict_file.path, events_metadata_path, args.verbose) # type: ignore + else: + events_metadata_path = events_dict_file.path + + _, dataset_tag_dict = insert_events_metadata_file(env, DatasetSource(project_id), events_dict_file) + + return dataset_tag_dict + + +def insert_events_metadata_file( + env: Env, + source: EventFileSource, + events_dictionary_file: BidsJsonFile, +): + """ + Inserts the events metadata information read from the file *events.json + into the physiological_event_file, physiological_event_parameter + and physiological_event_parameter_category_level tables, linking it to the + physiological file ID already inserted in physiological_file. + """ + + event_file = DbPhysioEventFile( + physio_file_id = source.physio_file_id, + project_id = source.project_id, + type = 'json', + path = events_dictionary_file.path, + ) + + env.db.add(event_file) + env.db.flush() + + tag_dict: dict[str, dict[str, list[TagGroupMember]]] = {} + for event_name, event in events_dictionary_file.data.items(): + tag_dict[event_name] = parse_event_description(env, source, event_name, event) + + if source.physio_file is not None: + # get the blake2b hash of the task events file + blake2 = compute_file_blake2b_hash(events_dictionary_file.path) + + # insert blake2b hash of task event file into physiological_parameter_file + insert_physio_file_parameter(env, source.physio_file, 'event_file_json_blake2b_hash', blake2) + env.db.flush() + + return event_file.id, tag_dict + + +def parse_event_description( + env: Env, + source: EventFileSource, + event_name: str, + event: Any, +) -> dict[str, list[TagGroupMember]]: + """ + Parse and insert the HED tags of an event dictionary file. + """ + + if event['Levels'] is None: + return {} + + tag_dict: dict[str, list[TagGroupMember]] = {} + for level_name, level in event['Levels'].items(): + tag_dict[level_name] = [] + level_hed = event['HED'][level_name] \ + if isinstance(event['HED'], dict) and level in event['HED'] \ + else None + + if level_hed is not None: + tag_groups = build_hed_tag_groups(env, level_hed) + insert_hed_tag_group(env, source, tag_groups, event_name, level_name, str(level)) + tag_dict[level_name] = tag_groups + + return tag_dict diff --git a/python/lib/import_bids_dataset/events_tsv.py b/python/lib/import_bids_dataset/events_tsv.py new file mode 100644 index 000000000..941e12382 --- /dev/null +++ b/python/lib/import_bids_dataset/events_tsv.py @@ -0,0 +1,71 @@ +from datetime import datetime, timedelta +from decimal import Decimal + +from loris_bids_reader.files.events import BidsEventsTsvFile +from loris_bids_reader.info import BidsAcquisitionInfo + +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.session import DbSession +from lib.env import Env +from lib.import_bids_dataset.copy_files import get_loris_bids_file_path +from lib.import_bids_dataset.env import BidsImportEnv +from lib.physio.events import insert_physio_event_task, insert_physio_events_file + + +def insert_bids_events_file( + env: Env, + import_env: BidsImportEnv, + physio_file: DbPhysioFile, + session: DbSession, + acquisition: BidsAcquisitionInfo, + events_file: BidsEventsTsvFile, + # blake2, + # dataset_tag_dict, + # file_tag_dict, + # hed_union, +): + """ + Inserts the event information read from the file *events.tsv + into the physiological_task_event table, linking it to the + physiological file ID already inserted in physiological_file. + Only called in `eeg.py`. + + :param event_data : list with dictionaries of events + information to insert into + physiological_task_event + :type event_data : list + :param event_file : name of the event file + :type event_file : str + :param physiological_file_id: PhysiologicalFileID to link the event info to + :type physiological_file_id: int + :param project_id : ProjectID to link the event info to + :type project_id : int + :param blake2 : blake2b hash of the task event file + :type blake2 : str + :param dataset_tag_dict : Dict of dataset-inherited HED tags + :type dataset_tag_dict : dict + :param file_tag_dict : Dict of subject-inherited HED tags + :type file_tag_dict : dict + :param hed_union : Union of HED schemas + :type hed_union : any + """ + + loris_events_file_path = get_loris_bids_file_path(import_env, session, acquisition.data_type, events_file.path) + physio_events_file = insert_physio_events_file(env, physio_file, loris_events_file_path) + + for row in events_file.rows: + insert_physio_event_task( + env, + physio_file, + physio_events_file, + Decimal(row.data['onset']), + Decimal(row.data['duration']), + row.data['trial_type'], + ( + (datetime(1, 1, 1) + timedelta(seconds=row.data['response_time'])).time() + if row.data['response_time'] is not None + else None + ), + ) + + # TODO: Handle HED. diff --git a/python/lib/import_bids_dataset/main.py b/python/lib/import_bids_dataset/main.py new file mode 100644 index 000000000..5d631ce67 --- /dev/null +++ b/python/lib/import_bids_dataset/main.py @@ -0,0 +1,221 @@ +from typing import Any + +from loris_bids_reader.meg.reader import BidsMegDataTypeReader +from loris_bids_reader.mri.reader import BidsMriDataTypeReader +from loris_bids_reader.reader import BidsDatasetReader, BidsDataTypeReader, BidsSessionReader +from loris_utils.iter import count + +from lib.config import get_data_dir_path_config, get_default_bids_visit_label_config +from lib.database import Database +from lib.db.models.session import DbSession +from lib.db.queries.candidate import try_get_candidate_with_psc_id +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.eeg import Eeg +from lib.env import Env +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.check_sessions import check_or_create_bids_sessions +from lib.import_bids_dataset.check_subjects import check_or_create_bids_subjects +from lib.import_bids_dataset.copy_files import ( + copy_bids_tsv_participants, + copy_bids_tsv_scans, + copy_static_dataset_files, + get_loris_bids_dataset_path, +) +from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.events import get_root_events_metadata +from lib.import_bids_dataset.meg.ctf import import_bids_meg_data_type +from lib.import_bids_dataset.mri import import_bids_mri_data_type +from lib.import_bids_dataset.print import print_bids_import_summary +from lib.logging import log, log_error_exit, log_warning + + +def import_bids_dataset(env: Env, args: Args, legacy_db: Database): + """ + Read the provided BIDS dataset and import it into LORIS. + """ + + data_dir_path = get_data_dir_path_config(env) + + log(env, "Parsing BIDS dataset...") + + bids = BidsDatasetReader(args.source_bids_path, args.bids_validation) + + # TODO: Not the exact count. + acquisitions_count = count(bids.data_types) + + log(env, f"Found {acquisitions_count} acquisitions.") + + log(env, f"Found {len(bids.subject_labels)} subjects:") + for subject_label in bids.subject_labels: + log(env, f"- {subject_label}") + + log(env, f"Found {len(bids.session_labels)} sessions:") + for session_label in bids.session_labels: + log(env, f"- {session_label}") + + # Check the BIDS subject and session labels and create their candidates and sessions in LORIS + # if needed. + + check_or_create_bids_subjects( + env, + [subject.info for subject in bids.subjects], + args.create_candidate, + ) + + sessions = check_or_create_bids_sessions( + env, + [session.info for session in bids.sessions], + args.create_session, + ) + + project_id = sessions[0].project.id + + env.db.commit() + + # Get the LORIS BIDS import directory path and create the directory if needed. + + if args.copy: + loris_bids_path = get_loris_bids_dataset_path(env, bids) + else: + loris_bids_path = None + + # Get the BIDS events metadata. + + events_metadata = get_root_events_metadata(env, args, bids, loris_bids_path, project_id) + + # Copy the `participants.tsv` file rows. + + if loris_bids_path is not None and bids.participants_file is not None: + loris_participants_tsv_path = loris_bids_path / 'participants.tsv' + copy_bids_tsv_participants(bids.participants_file, loris_participants_tsv_path) + + # Process each session directory. + + import_env = BidsImportEnv( + data_dir_path = data_dir_path, + loris_bids_path = loris_bids_path.relative_to(data_dir_path) if loris_bids_path is not None else None, + total_files_count = acquisitions_count, + ) + + for bids_session in bids.sessions: + import_bids_session(env, import_env, args, bids_session, events_metadata, legacy_db) + + # Copy the static BIDS files. + + if loris_bids_path is not None: + copy_static_dataset_files(bids.path, loris_bids_path) + + # Print import summary. + + print_bids_import_summary(env, import_env) + + +def import_bids_session( + env: Env, + import_env: BidsImportEnv, + args: Args, + bids_session: BidsSessionReader, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS session directory and import it into LORIS. + """ + + log(env, f"Importing files for subject '{bids_session.subject.label}' and session '{bids_session.label}'.") + + candidate = try_get_candidate_with_psc_id(env.db, bids_session.subject.label) + if candidate is None: + # This should not happen as BIDS subject labels should have been checked previously. + log_error_exit(env, f"Candidate not found for PSCID '{bids_session.subject.label}'.") + + if bids_session.label is not None: + visit_label = bids_session.label + else: + visit_label = get_default_bids_visit_label_config(env) + if visit_label is None: + log_error_exit( + env, + "Missing BIDS session in the dataset or default BIDS visit label in the LORIS configuration.", + ) + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is None: + # This should not happen as BIDS session labels should have been checked previously. + log_error_exit(env, f"Visit not found for visit label '{visit_label}'.") + + try: + # Read the scans.tsv property to raise an exception if the file is incorrect. + scans_file = bids_session.scans_file + + if import_env.loris_bids_path is not None and scans_file is not None: + loris_scans_tsv_path = ( + import_env.loris_bids_path + / f'sub-{bids_session.subject.label}' + / f'ses-{bids_session.label}' + / f'sub-{bids_session.subject.label}_ses-{bids_session.label}_scans.tsv' + ) + + copy_bids_tsv_scans(scans_file, loris_scans_tsv_path) + except Exception as exception: + log_warning( + env, + f"Error while reading the session scans.tsv file, scans.tsv data will be ignored. Full error:\n{exception}" + ) + + # Process each data type directory. + + for data_type in bids_session.data_types: + import_bids_data_type(env, import_env, args, session, data_type, events_metadata, legacy_db) + + +def import_bids_data_type( + env: Env, + import_env: BidsImportEnv, + args: Args, + session: DbSession, + data_type: BidsDataTypeReader, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS data type directory and import it into LORIS. + """ + + log(env, f"Importing data type {data_type.name}") + + if data_type.session.scans_file is None: + log_warning(env, "No 'scans.tsv' file found, 'scans.tsv' data will be ignored.") + + match data_type: + case BidsMriDataTypeReader(): + import_bids_mri_data_type(env, import_env, session, data_type) + case BidsMegDataTypeReader(): + import_bids_meg_data_type(env, import_env, args, session, data_type) + case BidsDataTypeReader(): + import_bids_eeg_data_type_files(env, import_env, args, session, data_type, events_metadata, legacy_db) + + +def import_bids_eeg_data_type_files( + env: Env, + import_env: BidsImportEnv, + args: Args, + session: DbSession, + data_type: BidsDataTypeReader, + events_metadata: dict[Any, Any], + legacy_db: Database, +): + """ + Read the provided BIDS EEG data type directory and import it into LORIS. + """ + + Eeg( + env = env, + import_env = import_env, + bids_layout = data_type.session.subject.dataset.layout, + bids_info = data_type.info, + db = legacy_db, + session = session, + dataset_tag_dict = events_metadata, + dataset_type = args.type, + ) diff --git a/python/lib/import_bids_dataset/meg/ctf.py b/python/lib/import_bids_dataset/meg/ctf.py new file mode 100644 index 000000000..1d0b9b4d2 --- /dev/null +++ b/python/lib/import_bids_dataset/meg/ctf.py @@ -0,0 +1,189 @@ +from pathlib import Path + +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_bids_reader.meg.acquisition import MegAcquisition +from loris_bids_reader.meg.reader import BidsMegDataTypeReader +from loris_utils.archive import create_archive_with_file +from loris_utils.error import group_errors_tuple +from loris_utils.path import add_path_extension + +from lib.config import ( + get_data_dir_path_config, + get_ephys_archive_dir_path_config, + get_ephys_visualization_enabled_config, +) +from lib.db.models.meg_ctf_head_shape_file import DbMegCtfHeadShapeFile +from lib.db.models.session import DbSession +from lib.db.queries.physio_file import try_get_physio_file_with_path +from lib.env import Env +from lib.import_bids_dataset.acquisitions import import_bids_acquisitions +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.channels import insert_bids_channels_file +from lib.import_bids_dataset.copy_files import copy_loris_bids_file, get_loris_bids_file_path +from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.events import insert_events_metadata_file +from lib.import_bids_dataset.events_tsv import insert_bids_events_file +from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type +from lib.import_bids_dataset.meg.ctf_head_shape import insert_head_shape_file +from lib.import_bids_dataset.physio import ( + get_check_bids_physio_file_hash, + get_check_bids_physio_modality, + get_check_bids_physio_output_type, +) +from lib.logging import log, log_warning +from lib.physio.chunking import create_physio_channels_chunks +from lib.physio.events import FileSource +from lib.physio.file import insert_physio_file +from lib.physio.parameters import insert_physio_file_parameter + + +def import_bids_meg_data_type( + env: Env, + import_env: BidsImportEnv, + args: Args, + session: DbSession, + data_type: BidsMegDataTypeReader, +): + if data_type.head_shape_file is not None: + head_shape_file_path = get_loris_bids_file_path( + import_env, + session, + data_type.name, + data_type.head_shape_file.path, + ) + + head_shape_file = insert_head_shape_file(env, data_type.head_shape_file, head_shape_file_path) + copy_loris_bids_file(import_env, data_type.head_shape_file.path, head_shape_file_path) + else: + head_shape_file = None + + import_bids_acquisitions( + env, + import_env, + data_type.acquisitions, + lambda acquisition, bids_info: import_bids_meg_acquisition( + env, + import_env, + args, + session, + acquisition, + bids_info, + head_shape_file, + ), + ) + + +def import_bids_meg_acquisition( + env: Env, + import_env: BidsImportEnv, + args: Args, + session: DbSession, + acquisition: MegAcquisition, + bids_info: BidsAcquisitionInfo, + head_shape_file: DbMegCtfHeadShapeFile | None, +): + modality, output_type, file_type, file_hash = group_errors_tuple( + f"Error while checking database information for MEG acquisition '{bids_info.name}'.", + lambda: get_check_bids_physio_modality(env, bids_info.data_type), + lambda: get_check_bids_physio_output_type(env, args.type or 'raw'), + lambda: get_check_bids_imaging_file_type(env, 'ctf'), + lambda: get_check_bids_physio_file_hash(env, acquisition.ctf_path), + ) + + # The files to copy to LORIS, with the source path on the left and the LORIS path on the right. + files_to_copy: list[tuple[Path, Path]] = [] + + loris_file_path = get_loris_bids_file_path(import_env, session, bids_info.data_type, acquisition.ctf_path) + files_to_copy.append((acquisition.ctf_path, loris_file_path)) + + loris_file = try_get_physio_file_with_path(env.db, loris_file_path) + if loris_file is not None: + log(env, f"File '{loris_file_path}' is already registered in LORIS. Skipping.") + import_env.ignored_files_count += 1 + return + + check_bids_meg_metadata_files(env, acquisition, bids_info) + + ctf_archive_path = get_ctf_archive_path(env, loris_file_path) + create_archive_with_file(import_env.data_dir_path / ctf_archive_path, acquisition.ctf_path) + + physio_file = insert_physio_file( + env, + session, + loris_file_path, + file_type, + modality, + output_type, + bids_info.scan_row.get_acquisition_time() if bids_info.scan_row is not None else None, + ctf_archive_path, + head_shape_file, + ) + + insert_physio_file_parameter(env, physio_file, 'physiological_file_blake2b_hash', file_hash) + for name, value in acquisition.sidecar_file.data.items(): + insert_physio_file_parameter(env, physio_file, name, value) + + if acquisition.events_file is not None: + insert_bids_events_file(env, import_env, physio_file, session, bids_info, acquisition.events_file) + loris_events_file_path = get_loris_bids_file_path( + import_env, session, bids_info.data_type, acquisition.events_file.path + ) + files_to_copy.append((acquisition.events_file.path, loris_events_file_path)) + if acquisition.events_file.dictionary is not None: + insert_events_metadata_file(env, FileSource(physio_file), acquisition.events_file.dictionary) + loris_events_dictionary_file_path = get_loris_bids_file_path( + import_env, session, bids_info.data_type, acquisition.events_file.dictionary.path + ) + files_to_copy.append((acquisition.events_file.dictionary.path, loris_events_dictionary_file_path)) + + if acquisition.channels_file is not None: + insert_bids_channels_file(env, import_env, physio_file, session, bids_info, acquisition.channels_file) + loris_channels_file_path = get_loris_bids_file_path( + import_env, session, bids_info.data_type, acquisition.channels_file.path + ) + files_to_copy.append((acquisition.channels_file.path, loris_channels_file_path)) + + for source_path, destination_path in files_to_copy: + copy_loris_bids_file(import_env, source_path, destination_path) + + env.db.commit() + + log(env, f"MEG file succesfully imported with ID: {physio_file.id}.") + + if get_ephys_visualization_enabled_config(env): + log(env, "Creating visualization chunks...") + create_physio_channels_chunks(env, physio_file) + + env.db.commit() + + +def check_bids_meg_metadata_files(env: Env, acquisition: MegAcquisition, bids_info: BidsAcquisitionInfo): + """ + Check for the presence of BIDS metadata files for the BIDS MEG acquisition and warn the user if + that is not the case. + """ + + if acquisition.channels_file is None: + log_warning(env, f"No channels file found for acquisition '{bids_info.name}'.") + + if acquisition.events_file is None: + log_warning(env, f"No events file found for acquisition '{bids_info.name}'.") + + if acquisition.events_file is not None and acquisition.events_file.dictionary is not None: + log_warning(env, f"No events dictionary file found for acquisition '{bids_info.name}'.") + + +def get_ctf_archive_path(env: Env, loris_ctf_path: Path) -> Path: + """ + Get the path of a CTF archive. + """ + + archive_rel_path = add_path_extension(loris_ctf_path, 'tgz') + archive_dir_path = get_ephys_archive_dir_path_config(env) + if archive_dir_path is not None: + data_dir_path = get_data_dir_path_config(env) + archive_path = archive_dir_path / 'ctf' / archive_rel_path.name + archive_path.parent.mkdir(exist_ok=True, parents=True) + return (archive_path).relative_to(data_dir_path) + else: + return archive_rel_path diff --git a/python/lib/import_bids_dataset/meg/ctf_head_shape.py b/python/lib/import_bids_dataset/meg/ctf_head_shape.py new file mode 100644 index 000000000..ef043a3ef --- /dev/null +++ b/python/lib/import_bids_dataset/meg/ctf_head_shape.py @@ -0,0 +1,40 @@ +from pathlib import Path + +from loris_bids_reader.meg.head_shape import MegCtfHeadShapeFile +from loris_utils.crypto import compute_file_blake2b_hash + +from lib.db.models.meg_ctf_head_shape_file import DbMegCtfHeadShapeFile +from lib.db.models.meg_ctf_head_shape_point import DbMegCtfHeadShapePoint +from lib.env import Env + + +def insert_head_shape_file( + env: Env, + head_shape_file: MegCtfHeadShapeFile, + loris_head_shape_file_path: Path, +) -> DbMegCtfHeadShapeFile: + """ + Insert a MEG CTF head shape file into the LORIS database. + """ + + blake2b_hash = compute_file_blake2b_hash(head_shape_file.path) + + db_head_shape_file = DbMegCtfHeadShapeFile( + path = loris_head_shape_file_path, + blake2b_hash = blake2b_hash, + ) + + env.db.add(db_head_shape_file) + env.db.flush() + + for name, point in head_shape_file.points.items(): + env.db.add(DbMegCtfHeadShapePoint( + file_id = db_head_shape_file.id, + name = name, + x = point.x, + y = point.y, + z = point.z, + )) + + env.db.flush() + return db_head_shape_file diff --git a/python/lib/import_bids_dataset/mri.py b/python/lib/import_bids_dataset/mri.py new file mode 100644 index 000000000..0ce87e19c --- /dev/null +++ b/python/lib/import_bids_dataset/mri.py @@ -0,0 +1,218 @@ +from pathlib import Path +from typing import Any + +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_bids_reader.mri.acquisition import MriAcquisition +from loris_bids_reader.mri.reader import BidsMriDataTypeReader +from loris_utils.crypto import compute_file_blake2b_hash +from loris_utils.error import group_errors_tuple + +from lib.db.models.mri_scan_type import DbMriScanType +from lib.db.models.session import DbSession +from lib.db.queries.file import try_get_file_with_hash, try_get_file_with_path +from lib.db.queries.mri_scan_type import try_get_mri_scan_type_with_name +from lib.env import Env +from lib.imaging_lib.file import register_mri_file +from lib.imaging_lib.file_parameter import register_mri_file_parameter, register_mri_file_parameters +from lib.imaging_lib.nifti import add_nifti_spatial_file_parameters +from lib.imaging_lib.nifti_pic import create_nifti_preview_picture +from lib.imaging_lib.scan_type import create_mri_scan_type +from lib.import_bids_dataset.acquisitions import import_bids_acquisitions +from lib.import_bids_dataset.copy_files import copy_loris_bids_file, get_loris_bids_file_path +from lib.import_bids_dataset.env import BidsImportEnv +from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type_from_extension +from lib.import_bids_dataset.mri_sidecar import add_bids_mri_sidecar_file_parameters +from lib.import_bids_dataset.scans import add_bids_scans_file_parameters +from lib.logging import log + +KNOWN_SUFFIXES_PER_MRI_DATA_TYPE = { + 'anat': [ + 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', 'FLASH', 'PD', 'PDmap', 'PDT2', + 'inplaneT1', 'inplaneT2', 'angio', + ], + 'func': [ + 'bold', 'cbv', 'phase', + ], + 'dwi': [ + 'dwi', 'sbref', + ], + 'fmap': [ + 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', 'fieldmap', 'epi', + ], +} + + +def import_bids_mri_data_type( + env: Env, + import_env: BidsImportEnv, + session: DbSession, + data_type: BidsMriDataTypeReader, +): + """ + Import the MRI acquisitions found in a BIDS MRI data type directory. + """ + + import_bids_acquisitions( + env, + import_env, + data_type.acquisitions, + lambda acquisition, bids_info: import_bids_mri_acquisition( + env, + import_env, + session, + acquisition, + bids_info, + ), + ) + + +def import_bids_mri_acquisition( + env: Env, + import_env: BidsImportEnv, + session: DbSession, + acquisition: MriAcquisition, + bids_info: BidsAcquisitionInfo, +): + """ + Import a BIDS NIfTI file and its associated files in LORIS. + """ + + # The files to copy to LORIS, with the source path on the left and the LORIS path on the right. + files_to_copy: list[tuple[Path, Path]] = [] + + loris_file_path = get_loris_bids_file_path(import_env, session, bids_info.data_type, acquisition.nifti_path) + files_to_copy.append((acquisition.nifti_path, loris_file_path)) + + # Check whether the file is already registered in LORIS. + + loris_file = try_get_file_with_path(env.db, loris_file_path) + if loris_file is not None: + import_env.ignored_acquisitions_count += 1 + log(env, f"File '{loris_file_path}' is already registered in LORIS. Skipping.") + return + + # Get information about the file. + + file_type, file_hash, scan_type = group_errors_tuple( + f"Error while checking database information for MRI acquisition '{bids_info.name}'.", + lambda: get_check_bids_imaging_file_type_from_extension(env, acquisition.nifti_path), + lambda: get_check_bids_nifti_file_hash(env, acquisition), + lambda: get_check_bids_nifti_mri_scan_type(env, bids_info), + ) + + # Get the auxiliary files. + + # The auxiliary files to the NIfTI file and its sidecar, with the file type on the left and the + # file path on the right. + aux_file_paths: list[tuple[str, Path]] = [] + + if acquisition.bval_path is not None: + aux_file_paths.append(('bval', acquisition.bval_path)) + + if acquisition.bvec_path is not None: + aux_file_paths.append(('bvec', acquisition.bvec_path)) + + if acquisition.physio_path is not None: + aux_file_paths.append(('physio', acquisition.physio_path)) + + if acquisition.events_path is not None: + aux_file_paths.append(('events', acquisition.events_path)) + + # Get the file parameters. + + file_parameters: dict[str, Any] = {} + + if acquisition.sidecar_file is not None: + add_bids_mri_sidecar_file_parameters(env, acquisition.sidecar_file, file_parameters) + json_loris_path = get_loris_bids_file_path( + import_env, + session, + bids_info.data_type, + acquisition.sidecar_file.path, + ) + + files_to_copy.append((acquisition.sidecar_file.path, json_loris_path)) + file_parameters['bids_json_file'] = json_loris_path + file_parameters['bids_json_file_blake2b_hash'] = compute_file_blake2b_hash(acquisition.sidecar_file.path) + + add_nifti_spatial_file_parameters(acquisition.nifti_path, file_parameters) + file_parameters['file_blake2b_hash'] = file_hash + + if bids_info.scans_file is not None and bids_info.scan_row is not None: + add_bids_scans_file_parameters(bids_info.scans_file, bids_info.scan_row, file_parameters) + + for aux_file_type, aux_file_path in aux_file_paths: + aux_file_hash = compute_file_blake2b_hash(aux_file_path) + aux_file_loris_path = get_loris_bids_file_path(import_env, session, bids_info.data_type, aux_file_path) + files_to_copy.append((aux_file_path, aux_file_loris_path)) + file_parameters[f'bids_{aux_file_type}'] = str(aux_file_loris_path) + file_parameters[f'bids_{aux_file_type}_blake2b_hash'] = aux_file_hash + + # Copy the files on the file system. + for copied_file_path, loris_copied_file_path in files_to_copy: + copy_loris_bids_file(import_env, copied_file_path, loris_copied_file_path) + + # Register the file and its parameters in the database. + + file = register_mri_file( + env, + loris_file_path, + file_type, + session, + scan_type, + None, + None, + file_parameters.get('SeriesInstanceUID'), + file_parameters.get('EchoTime'), + file_parameters.get('EchoNumber'), + file_parameters.get('PhaseEncodingDirection'), + bids_info.scan_row.get_acquisition_time() if bids_info.scan_row is not None else None, + False, + ) + + register_mri_file_parameters(env, file, file_parameters) + + env.db.commit() + + # Create and register the file picture. + + pic_rel_path = create_nifti_preview_picture(env, file) + + register_mri_file_parameter(env, file, 'check_pic_filename', str(pic_rel_path)) + + env.db.commit() + + +def get_check_bids_nifti_file_hash(env: Env, acquisition: MriAcquisition) -> str: + """ + Compute the BLAKE2b hash of a NIfTI file and raise an exception if that hash is already + registered in the database. + """ + + file_hash = compute_file_blake2b_hash(acquisition.nifti_path) + + file = try_get_file_with_hash(env.db, file_hash) + if file is not None: + raise Exception(f"File with hash '{file_hash}' already present in the database.") + + return file_hash + + +def get_check_bids_nifti_mri_scan_type(env: Env, bids_info: BidsAcquisitionInfo) -> DbMriScanType: + """ + Get the MRI scan type corresponding to a BIDS MRI acquisition using its BIDS suffix. Create the + MRI scan type in the database the suffix is a standard BIDS suffix and the scan type does not + already exist in the database, or raise an exception if no known scan type is found. + """ + + if bids_info.suffix is None: + raise Exception("No BIDS suffix found in the NIfTI file name, cannot infer the file data type.") + + mri_scan_type = try_get_mri_scan_type_with_name(env.db, bids_info.suffix) + if mri_scan_type is not None: + return mri_scan_type + + if bids_info.suffix not in KNOWN_SUFFIXES_PER_MRI_DATA_TYPE[bids_info.data_type]: + raise Exception(f"Found unknown MRI file suffix '{bids_info.suffix}'.") + + return create_mri_scan_type(env, bids_info.suffix) diff --git a/python/lib/import_bids_dataset/physio.py b/python/lib/import_bids_dataset/physio.py index 56b2e095c..a8ef7858d 100644 --- a/python/lib/import_bids_dataset/physio.py +++ b/python/lib/import_bids_dataset/physio.py @@ -1,6 +1,6 @@ from pathlib import Path -from loris_utils.crypto import compute_file_blake2b_hash +from loris_utils.crypto import compute_directory_blake2b_hash, compute_file_blake2b_hash from lib.db.models.physio_modality import DbPhysioModality from lib.db.models.physio_output_type import DbPhysioOutputType @@ -41,7 +41,10 @@ def get_check_bids_physio_file_hash(env: Env, file_path: Path) -> str: registered in the database. """ - file_hash = compute_file_blake2b_hash(file_path) + if file_path.is_dir(): + file_hash = compute_directory_blake2b_hash(file_path) + else: + file_hash = compute_file_blake2b_hash(file_path) file = try_get_physio_file_with_hash(env.db, file_hash) if file is not None: diff --git a/python/lib/import_bids_dataset/print.py b/python/lib/import_bids_dataset/print.py new file mode 100644 index 000000000..bea3cff27 --- /dev/null +++ b/python/lib/import_bids_dataset/print.py @@ -0,0 +1,60 @@ +from lib.env import Env +from lib.import_bids_dataset.env import BidsImportEnv +from lib.logging import log + + +def print_bids_import_summary(env: Env, import_env: BidsImportEnv): + """ + Print a summary of this BIDS import process. + """ + + log( + env, + ( + f"Processed {import_env.processed_files_count} acquisitions, including {import_env.imported_files_count}" + f" imports, {import_env.ignored_files_count} ignores, and {import_env.failed_files_count}" + " errors." + ), + ) + + if import_env.unknown_mri_scan_types != []: + import_env.unknown_mri_scan_types.sort() + + unknwon_scan_types_string = "" + for unknown_status_type in import_env.unknown_mri_scan_types: + unknwon_scan_types_string += f"\n- {unknown_status_type}" + + log( + env, + f"Found {len(import_env.unknown_mri_scan_types)} unknown MRI scan types:{unknwon_scan_types_string}" + ) + + if import_env.unknown_physio_channel_types != []: + import_env.unknown_physio_channel_types.sort() + + unknown_channel_types_string = "" + for unknown_channel_type in import_env.unknown_physio_channel_types: + unknown_channel_types_string += f"\n- {unknown_channel_type}" + + log( + env, + ( + f"Found {len(import_env.unknown_physio_channel_types)} unknown physiological channel types:" + f"{unknown_channel_types_string}" + ), + ) + + if import_env.unknown_physio_status_types != []: + import_env.unknown_physio_status_types.sort() + + unknown_status_types_string = "" + for unknown_status_type in import_env.unknown_physio_status_types: + unknown_status_types_string += f"\n- {unknown_status_type}" + + log( + env, + ( + f"Found {len(import_env.unknown_physio_status_types)} unknown physiological status types:" + f"{unknown_status_types_string}" + ), + ) diff --git a/python/lib/import_bids_dataset/scans.py b/python/lib/import_bids_dataset/scans.py new file mode 100644 index 000000000..c7bef44c5 --- /dev/null +++ b/python/lib/import_bids_dataset/scans.py @@ -0,0 +1,20 @@ +from typing import Any + +from loris_bids_reader.files.scans import BidsScansTsvFile, BidsScanTsvRow +from loris_utils.crypto import compute_file_blake2b_hash + + +def add_bids_scans_file_parameters( + scans_file: BidsScansTsvFile, + scan_row: BidsScanTsvRow, + file_parameters: dict[str, Any], +): + """ + Read a BIDS `scans.tsv` file and row, and add its information to the LORIS file parameters + dictionary. + """ + + file_parameters['scan_acquisition_time'] = scan_row.get_acquisition_time() + file_parameters['age_at_scan'] = scan_row.get_age_at_scan() + file_parameters['scans_tsv_file'] = scans_file.path + file_parameters['scans_tsv_file_bake2hash'] = compute_file_blake2b_hash(scans_file.path) diff --git a/python/lib/mri.py b/python/lib/mri.py deleted file mode 100644 index 9017e37d9..000000000 --- a/python/lib/mri.py +++ /dev/null @@ -1,399 +0,0 @@ -"""Deals with MRI BIDS datasets and register them into the database.""" - -import getpass -import os -import re -import sys -from pathlib import Path - -from loris_bids_reader.files.scans import BidsScansTsvFile -from loris_bids_reader.mri.sidecar import BidsMriSidecarJsonFile -from loris_utils.crypto import compute_file_blake2b_hash - -import lib.exitcode -import lib.utilities as utilities -from lib.db.models.session import DbSession -from lib.env import Env -from lib.imaging import Imaging -from lib.import_bids_dataset.copy_files import copy_scans_tsv_file_to_loris_bids_dir -from lib.import_bids_dataset.file_type import get_check_bids_imaging_file_type_from_extension - - -class Mri: - """ - This class reads the BIDS MRI data structure and registers the MRI datasets into the - database by calling lib.imaging class. - - :Example: - - from lib.mri import Mri - from lib.database import Database - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - data_dir = config_obj.get_config('dataDirBasepath') - - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - for row in bids_reader.cand_session_modalities_list: - for modality in row['modalities']: - if modality in ['anat', 'dwi', 'fmap', 'func']: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_mri_rel_dir = "sub-" + row['bids_sub_id'] + "/" + \ - "ses-" + visit_label + "/mri/" - lib.utilities.create_dir( - loris_bids_root_dir + loris_bids_mri_rel_dir, verbose - ) - Mri( - env = env, - bids_layout = bids_layout, - session = session, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_mri_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - - # disconnect from the database - db.disconnect() - """ - - def __init__(self, env: Env, bids_layout, session: DbSession, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, - loris_bids_mri_rel_dir, loris_bids_root_dir): - - # enumerate the different suffixes supported by BIDS per modality type - self.possible_suffix_per_modality = { - 'anat' : [ - 'T1w', 'T2w', 'T1rho', 'T1map', 'T2map', 'T2star', 'FLAIR', - 'FLASH', 'PD', 'PDmap', 'PDT2', 'inplaneT1', 'inplaneT2', 'angio' - ], - 'func' : [ - 'bold', 'cbv', 'phase' - ], - 'dwi' : [ - 'dwi', 'sbref' - ], - 'fmap' : [ - 'phasediff', 'magnitude1', 'magnitude2', 'phase1', 'phase2', - 'fieldmap', 'epi' - ] - } - - self.env = env - - # load bids objects - self.bids_layout = bids_layout - - # load the LORIS BIDS import root directory where the files will be copied - self.loris_bids_mri_rel_dir = loris_bids_mri_rel_dir - self.loris_bids_root_dir = loris_bids_root_dir - self.data_dir = data_dir - - # load BIDS subject, visit and modality - self.bids_sub_id = bids_sub_id - self.bids_ses_id = bids_ses_id - self.bids_modality = bids_modality - - # load database handler object and verbose bool - self.db = db - self.verbose = verbose - - # find corresponding CandID and SessionID in LORIS - self.session = session - self.default_vl = default_visit_label - - # grep all the NIfTI files for the modality - self.nifti_files = self.grep_nifti_files() - - # check if a tsv with acquisition dates or age is available for the subject - self.scans_file = None - if self.bids_layout.get(suffix='scans', subject=self.session.candidate.psc_id, return_type='filename'): - scans_file_path = self.bids_layout.get(suffix='scans', subject=self.session.candidate.psc_id, - return_type='filename', extension='tsv')[0] - self.scans_file = BidsScansTsvFile(Path(scans_file_path)) - - # loop through NIfTI files and register them in the DB - for nifti_file in self.nifti_files: - self.register_raw_file(nifti_file) - - def grep_nifti_files(self): - """ - Returns the list of NIfTI files found for the modality. - - :return: list of NIfTI files found for the modality - :rtype: list - """ - - # grep all the possible suffixes for the modality - modality_possible_suffix = self.possible_suffix_per_modality[self.bids_modality] - - # loop through the possible suffixes and grep the NIfTI files - nii_files_list = [] - for suffix in modality_possible_suffix: - nii_files_list.extend(self.grep_bids_files(suffix, 'nii.gz')) - - # return the list of found NIfTI files - return nii_files_list - - def grep_bids_files(self, bids_type, extension): - """ - Greps the BIDS files and their layout information from the BIDSLayout - and return that list. - - :param bids_type: the BIDS type to use to grep files (T1w, T2w, bold, dwi...) - :type bids_type: str - :param extension: extension of the file to look for (nii.gz, json...) - :type extension: str - - :return: list of files from the BIDS layout - :rtype: list - """ - - if self.bids_ses_id: - return self.bids_layout.get( - subject = self.bids_sub_id, - session = self.bids_ses_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - else: - return self.bids_layout.get( - subject = self.bids_sub_id, - datatype = self.bids_modality, - extension = extension, - suffix = bids_type - ) - - def register_raw_file(self, nifti_file): - """ - Registers raw MRI files and related files into the files and parameter_file tables. - - :param nifti_file: NIfTI file object - :type nifti_file: pybids NIfTI file object - """ - - # insert the NIfTI file - self.fetch_and_insert_nifti_file(nifti_file) - - def fetch_and_insert_nifti_file(self, nifti_file, derivatives=None): - """ - Gather NIfTI file information to insert into the files and parameter_file tables. - Once all the information has been gathered, it will call imaging.insert_imaging_file - that will perform the insertion into the files and parameter_file tables. - - :param nifti_file : NIfTI file object - :type nifti_file : pybids NIfTI file object - :param derivatives: whether the file to be registered is a derivative file - :type derivatives: bool - - :return: dictionary with the inserted file_id and file_path - :rtype: dict - """ - - # load the Imaging object that will be used to insert the imaging data into the database - imaging = Imaging(self.db, self.verbose) - - # load the list of associated files with the NIfTI file - associated_files = nifti_file.get_associations() - - # load the entity information from the NIfTI file - entities = nifti_file.get_entities() - scan_type = entities['suffix'] - - # loop through the associated files to grep JSON, bval, bvec... - sidecar_json = None - other_assoc_files = {} - for assoc_file in associated_files: - file_info = assoc_file.get_entities() - if re.search(r'json$', file_info['extension']): - sidecar_json = BidsMriSidecarJsonFile(Path(assoc_file.path)) - elif re.search(r'bvec$', file_info['extension']): - other_assoc_files['bvec_file'] = assoc_file.path - elif re.search(r'bval$', file_info['extension']): - other_assoc_files['bval_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'events': - other_assoc_files['task_file'] = assoc_file.path - elif re.search(r'tsv$', file_info['extension']) and file_info['suffix'] == 'physio': - other_assoc_files['physio_file'] = assoc_file.path - - # read the json file if it exists - file_parameters = {} - if sidecar_json is not None: - file_parameters = imaging.map_bids_param_to_loris_param(sidecar_json.data) - # copy the JSON file to the LORIS BIDS import directory - json_path = self.copy_file_to_loris_bids_dir(sidecar_json.path) - file_parameters['bids_json_file'] = json_path - json_blake2 = compute_file_blake2b_hash(sidecar_json.path) - file_parameters['bids_json_file_blake2b_hash'] = json_blake2 - - # grep the file type from the ImagingFileTypes table - file_type = get_check_bids_imaging_file_type_from_extension(self.env, Path(nifti_file.filename)) - - # determine the output type - output_type = 'derivatives' if derivatives else 'native' - if not derivatives: - coordinate_space = 'native' - - # get the acquisition date of the MRI or the age at the time of acquisition - if self.scans_file is not None: - scan_info = self.scans_file.get_row(Path(nifti_file.path)) - if scan_info is not None: - try: - file_parameters['scan_acquisition_time'] = scan_info.get_acquisition_time() - file_parameters['age_at_scan'] = scan_info.get_age_at_scan() - except Exception as error: - print(f"ERROR: {error}") - sys.exit(lib.exitcode.PROGRAM_EXECUTION_FAILURE) - - # copy the scans.tsv file to the LORIS BIDS import directory - scans_path = copy_scans_tsv_file_to_loris_bids_dir( - self.scans_file, - self.bids_sub_id, - self.loris_bids_root_dir, - self.data_dir, - ) - - file_parameters['scans_tsv_file'] = scans_path - scans_blake2 = compute_file_blake2b_hash(self.scans_file.path) - file_parameters['scans_tsv_file_bake2hash'] = scans_blake2 - - # grep voxel step from the NIfTI file header - step_parameters = imaging.get_nifti_image_step_parameters(nifti_file.path) - file_parameters['xstep'] = step_parameters[0] - file_parameters['ystep'] = step_parameters[1] - file_parameters['zstep'] = step_parameters[2] - - # grep the time length from the NIfTI file header - is_4d_dataset = False - length_parameters = imaging.get_nifti_image_length_parameters(nifti_file.path) - if len(length_parameters) == 4: - file_parameters['time'] = length_parameters[3] - is_4d_dataset = True - - # add all other associated files to the file_parameters so they get inserted - # in parameter_file - for type in other_assoc_files: - original_file_path = other_assoc_files[type] - copied_path = self.copy_file_to_loris_bids_dir(original_file_path) - file_param_name = 'bids_' + type - file_parameters[file_param_name] = copied_path - file_blake2 = compute_file_blake2b_hash(original_file_path) - hash_param_name = file_param_name + '_blake2b_hash' - file_parameters[hash_param_name] = file_blake2 - - # append the blake2b to the MRI file parameters dictionary - blake2 = compute_file_blake2b_hash(nifti_file.path) - file_parameters['file_blake2b_hash'] = blake2 - - # check that the file is not already inserted before inserting it - result = imaging.grep_file_info_from_hash(blake2) - file_id = result['FileID'] if result else None - file_path = result['File'] if result else None - if not file_id: - # grep the scan type ID from the mri_scan_type table (if it is not already in - # the table, it will add a row to the mri_scan_type table) - scan_type_id = self.db.grep_id_from_lookup_table( - id_field_name = 'MriScanTypeID', - table_name = 'mri_scan_type', - where_field_name = 'MriScanTypeName', - where_value = scan_type, - insert_if_not_found = True - ) - - # copy the NIfTI file to the LORIS BIDS import directory - file_path = self.copy_file_to_loris_bids_dir(nifti_file.path) - - # insert the file along with its information into files and parameter_file tables - echo_time = file_parameters['EchoTime'] if 'EchoTime' in file_parameters.keys() else None - echo_nb = file_parameters['EchoNumber'] if 'EchoNumber' in file_parameters.keys() else None - phase_enc_dir = file_parameters['PhaseEncodingDirection'] \ - if 'PhaseEncodingDirection' in file_parameters.keys() else None - file_info = { - 'FileType' : file_type.name, - 'File' : file_path, - 'SessionID' : self.session.id, - 'InsertedByUserID': getpass.getuser(), - 'CoordinateSpace' : coordinate_space, - 'OutputType' : output_type, - 'EchoTime' : echo_time, - 'PhaseEncodingDirection': phase_enc_dir, - 'EchoNumber' : echo_nb, - 'SourceFileID' : None, - 'MriScanTypeID' : scan_type_id - } - file_id = imaging.insert_imaging_file(file_info, file_parameters) - - # create the pic associated with the file - pic_rel_path = imaging.create_imaging_pic( - { - 'cand_id' : self.session.candidate.cand_id, - 'data_dir_path': self.data_dir, - 'file_rel_path': file_path, - 'is_4D_dataset': is_4d_dataset, - 'file_id' : file_id - } - ) - if os.path.exists(os.path.join(self.data_dir, 'pic/', pic_rel_path)): - imaging.insert_parameter_file(file_id, 'check_pic_filename', pic_rel_path) - - return {'file_id': file_id, 'file_path': file_path} - - def copy_file_to_loris_bids_dir(self, file, derivatives_path=None): - """ - Wrapper around the utilities.copy_file function that copies the file - to the LORIS BIDS import directory and returns the relative path of the - file (without the data_dir part). - - :param file: full path to the original file - :type file: str - :param derivatives_path: path to the derivative folder - :type derivatives_path: str - - :return: relative path to the copied file - :rtype: str - """ - - # determine the path of the copied file - copy_file = self.loris_bids_mri_rel_dir - if self.bids_ses_id: - copy_file += os.path.basename(file) - else: - # make sure the ses- is included in the new filename if using - # default visit label from the LORIS config - copy_file += str.replace( - os.path.basename(file), - "sub-" + self.bids_sub_id, - "sub-" + self.bids_sub_id + "_ses-" + self.default_vl - ) - if derivatives_path: - # create derivative subject/vl/modality directory - lib.utilities.create_dir( - derivatives_path + self.loris_bids_mri_rel_dir, - self.verbose - ) - copy_file = derivatives_path + copy_file - else: - copy_file = self.loris_bids_root_dir + copy_file - - # copy the file - utilities.copy_file(file, copy_file, self.verbose) - - # determine the relative path and return it - relative_path = copy_file.replace(self.data_dir, "") - - return relative_path diff --git a/python/lib/physio/channels.py b/python/lib/physio/channels.py new file mode 100644 index 000000000..7c4544698 --- /dev/null +++ b/python/lib/physio/channels.py @@ -0,0 +1,52 @@ +from datetime import datetime +from decimal import Decimal +from pathlib import Path + +from lib.db.models.physio_channel import DbPhysioChannel +from lib.db.models.physio_channel_type import DbPhysioChannelType +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.physio_status_type import DbPhysioStatusType +from lib.env import Env + + +def insert_physio_channel( + env: Env, + physio_file: DbPhysioFile, + channel_type: DbPhysioChannelType, + status_type: DbPhysioStatusType | None, + file_path: Path, + name: str, + description: str | None, + sampling_frequency: int | None, + low_cutoff: Decimal | None, + high_cutoff: Decimal | None, + notch: int | None, + status_description: str | None, + unit: str | None, +) -> DbPhysioChannel: + """ + Insert a physiological channel into the database. + """ + + event_file = DbPhysioChannel( + physio_file_id = physio_file.id, + file_path = file_path, + channel_type_id = channel_type.id, + status_type_id = status_type.id if status_type is not None else None, + insert_time = datetime.now(), + name = name, + description = description, + sampling_frequency = sampling_frequency, + low_cutoff = low_cutoff, + high_cutoff = high_cutoff, + manual_flag = None, # TODO + notch = notch, # TODO + reference = None, # TODO + status_description = status_description, + unit = unit, + ) + + env.db.add(event_file) + env.db.flush() + + return event_file diff --git a/python/lib/physio/chunking.py b/python/lib/physio/chunking.py index 6cd34dc1c..f72b78580 100644 --- a/python/lib/physio/chunking.py +++ b/python/lib/physio/chunking.py @@ -3,7 +3,7 @@ from loris_utils.path import get_path_stem import lib.exitcode -from lib.config import get_data_dir_path_config, get_eeg_chunks_dir_path_config +from lib.config import get_data_dir_path_config, get_ephys_chunks_dir_path_config from lib.db.models.physio_file import DbPhysioFile from lib.db.queries.physio_parameter import try_get_physio_file_parameter_with_file_id_name from lib.env import Env @@ -89,7 +89,7 @@ def get_dataset_chunks_dir_path(env: Env, physio_file: DbPhysioFile): # The first part of the physiological file path is assumed to be the BIDS imports directory # name. The second part of the physiological file path is assumed to be the dataset name. - eeg_chunks_dir_path = get_eeg_chunks_dir_path_config(env) + eeg_chunks_dir_path = get_ephys_chunks_dir_path_config(env) if eeg_chunks_dir_path is None: data_dir_path = get_data_dir_path_config(env) eeg_chunks_dir_path = data_dir_path / physio_file.path.parts[0] diff --git a/python/lib/physio/events.py b/python/lib/physio/events.py new file mode 100644 index 000000000..ddc92a81f --- /dev/null +++ b/python/lib/physio/events.py @@ -0,0 +1,103 @@ +from dataclasses import dataclass +from datetime import datetime, time +from decimal import Decimal +from pathlib import Path + +from lib.db.models.physio_event_file import DbPhysioEventFile +from lib.db.models.physio_file import DbPhysioFile +from lib.db.models.physio_task_event import DbPhysioTaskEvent +from lib.env import Env + + +@dataclass +class DatasetSource: + project_id: int + + @property + def physio_file(self) -> None: + return None + + @property + def physio_file_id(self) -> None: + return None + + @property + def project_wide(self) -> bool: + return True + + +@dataclass +class FileSource: + physio_file: DbPhysioFile + + @property + def project_id(self) -> int: + return self.physio_file.session.project.id + + @property + def physio_file_id(self) -> int: + return self.physio_file.id + + @property + def project_wide(self) -> bool: + return False + + +EventFileSource = DatasetSource | FileSource + + +def insert_physio_events_dictionary_file(env: Env): + pass + + +def insert_physio_events_file(env: Env, physio_file: DbPhysioFile, path: Path) -> DbPhysioEventFile: + """ + Insert a physiological events file into the database. + """ + + event_file = DbPhysioEventFile( + physio_file_id = physio_file.id, + project_id = physio_file.session.project.id, + file_type = 'tsv', + file_path = path, + ) + + env.db.add(event_file) + env.db.flush() + + return event_file + + +def insert_physio_event_task( + env: Env, + physio_file: DbPhysioFile, + events_file: DbPhysioEventFile, + onset: Decimal, + duration: Decimal, + trial_type: str | None, + response_time: time | None, +) -> DbPhysioTaskEvent: + """ + Insert a physiological event task in the database. + """ + + event_task_file = DbPhysioTaskEvent( + physio_file_id = physio_file.id, + event_file_id = events_file.id, + insert_time = datetime.now(), + onset = onset, + duration = duration, + event_code = 0, # row.event_code TODO: This seems to be a non-standard field. + event_value = '', # row.trial_type TODO: This seems to be a non-standard field. + event_sample = Decimal(0), # row.event_sample TODO: This seems to be a non-standard field. + event_type = '', # row.event_type TODO: This seems to be a non-standard field. + trial_type = trial_type, + response_time = response_time, + ) + + # TODO: Handle HED. + + env.db.add(event_task_file) + env.db.flush() + + return event_task_file diff --git a/python/lib/physio/file.py b/python/lib/physio/file.py index e1891da3f..a1969fb48 100644 --- a/python/lib/physio/file.py +++ b/python/lib/physio/file.py @@ -3,6 +3,7 @@ from pathlib import Path from lib.db.models.imaging_file_type import DbImagingFileType +from lib.db.models.meg_ctf_head_shape_file import DbMegCtfHeadShapeFile from lib.db.models.physio_file import DbPhysioFile from lib.db.models.physio_modality import DbPhysioModality from lib.db.models.physio_output_type import DbPhysioOutputType @@ -18,19 +19,27 @@ def insert_physio_file( modality: DbPhysioModality, output_type: DbPhysioOutputType, acquisition_time: datetime | None, + download_path: Path | None = None, + head_shape_file: DbMegCtfHeadShapeFile | None = None, ) -> DbPhysioFile: """ Insert a physiological file into the database. """ + # If the download path is not provided, use the normal file path. + if download_path is None: + download_path = file_path + file = DbPhysioFile( path = file_path, + download_path = download_path, type = file_type.name, session_id = session.id, modality_id = modality.id, output_type_id = output_type.id, acquisition_time = acquisition_time, inserted_by_user = getpass.getuser(), + head_shape_file_id = head_shape_file.id if head_shape_file is not None else None, ) env.db.add(file) diff --git a/python/lib/physio/hed.py b/python/lib/physio/hed.py new file mode 100644 index 000000000..b480f8852 --- /dev/null +++ b/python/lib/physio/hed.py @@ -0,0 +1,84 @@ +from dataclasses import dataclass +from typing import Any + +from lib.db.models.bids_event_dataset_mapping import DbBidsEventDatasetMapping +from lib.db.models.bids_event_file_mapping import DbBidsEventFileMapping +from lib.db.queries.hed_schema_node import get_all_hed_schema_nodes +from lib.env import Env +from lib.physio.events import DatasetSource, EventFileSource, FileSource + + +@dataclass +class TagGroupMember: + hed_tag_id: int | None + has_pairing: bool + additional_members: int + tag_value: str | None = None + + def __eq__(self, other: object): + if not isinstance(other, TagGroupMember): + return False + + return self.hed_tag_id == other.hed_tag_id and \ + self.has_pairing == other.has_pairing and \ + self.additional_members == other.additional_members + + +def build_hed_tag_groups(env: Env, hed_string: str) -> list[TagGroupMember]: + """ + Assemble the physiological event HED tags. + """ + + from lib.physiological import Physiological + + hed_schema_nodes = get_all_hed_schema_nodes(env.db) + hed_union: list[dict[str, Any]] = list(map(lambda hed_schema_node: { + 'ÍD': hed_schema_node.id, + 'Name': hed_schema_node.name, + }, hed_schema_nodes)) + + return Physiological.build_hed_tag_groups(hed_union, hed_string) # type: ignore + + +def insert_hed_tag_group( + env: Env, + source: EventFileSource, + hed_tag_group: list[TagGroupMember], + property_name: str | None, + property_value: str | None, + level_description: str | None, +): + """ + Insert some HED tag groups into the database. + """ + + for hed_tag in hed_tag_group: + match source: + case DatasetSource(): + mapping = DbBidsEventDatasetMapping( + target_id=source.project_id, + property_name=property_name, + property_value=property_value, + hed_tag_id=hed_tag.hed_tag_id, + tag_value=hed_tag.tag_value, + has_pairing=hed_tag.has_pairing, + description=level_description, + pair_rel_id=None, + additional_members=hed_tag.additional_members, + ) + case FileSource(): + mapping = DbBidsEventFileMapping( + target_id=source.project_id, + property_name=property_name, + property_value=property_value, + hed_tag_id=hed_tag.hed_tag_id, + tag_value=hed_tag.tag_value, + has_pairing=hed_tag.has_pairing, + description=level_description, + pair_rel_id=None, + additional_members=hed_tag.additional_members, + ) + + env.db.add(mapping) + + env.db.flush() diff --git a/python/lib/physiological.py b/python/lib/physiological.py index 6b8d8022c..9e7d3d6e5 100644 --- a/python/lib/physiological.py +++ b/python/lib/physiological.py @@ -1,6 +1,5 @@ """This class performs database queries for BIDS physiological dataset (EEG, MEG...)""" -from dataclasses import dataclass from functools import reduce from loris_bids_reader.eeg.channels import BidsEegChannelsTsvFile @@ -17,6 +16,7 @@ from lib.db.queries.physio_channel import try_get_channel_type_with_name, try_get_status_type_with_name from lib.env import Env from lib.logging import log_error_exit +from lib.physio.hed import TagGroupMember from lib.physio.parameters import insert_physio_file_parameter, insert_physio_project_parameter from lib.point_3d import Point3D @@ -481,18 +481,6 @@ def get_additional_members_from_parenthesis_index(string_split, parentheses_to_f return additional_members return 0 - @dataclass - class TagGroupMember: - hed_tag_id: int | None - has_pairing: bool - additional_members: int - tag_value: str | None = None - - def __eq__(self, other): - return self.hed_tag_id == other.hed_tag_id and \ - self.has_pairing == other.has_pairing and \ - self.additional_members == other.additional_members - @staticmethod def build_hed_tag_groups(hed_union, hed_string): """ @@ -540,7 +528,7 @@ def build_hed_tag_groups(hed_union, hed_string): Physiological.get_additional_members_from_parenthesis_index(string_split, 1, element_index) hed_tag_id = Physiological.get_hed_tag_id_from_name(left_stripped, hed_union) - tag_group.append(Physiological.TagGroupMember(hed_tag_id, has_pairing, additional_members)) + tag_group.append(TagGroupMember(hed_tag_id, has_pairing, additional_members)) for i in range( 0 if group_depth > 0 and element.startswith('(') and element.endswith(')') else 1, @@ -549,7 +537,7 @@ def build_hed_tag_groups(hed_union, hed_string): has_pairing = True additional_members = \ Physiological.get_additional_members_from_parenthesis_index(string_split, i + 1, element_index) - tag_group.append(Physiological.TagGroupMember(None, has_pairing, additional_members)) + tag_group.append(TagGroupMember(None, has_pairing, additional_members)) group_depth += (len(element) - len(right_stripped)) group_depth -= num_opening_parentheses if len(tag_group) > 0: @@ -774,25 +762,3 @@ def insert_event_file(self, events_file: BidsEventsTsvFile, event_file, physiolo ) # insert blake2b hash of task event file into physiological_parameter_file insert_physio_file_parameter(self.env, physiological_file, 'event_file_blake2b_hash', blake2) - - def insert_archive_file(self, archive_info): - """ - Inserts the archive file of all physiological files (including - electrodes.tsv, channels.tsv and events.tsv) in the - physiological_archive table of the database. - - :param archive_info: dictionary with key/value pairs to insert - :type archive_info: dict - """ - - # insert the archive into the physiological_archive table - archive_fields = () - archive_values = () - for key, value in archive_info.items(): - archive_fields = (*archive_fields, key) - archive_values = (*archive_values, value) - self.db.insert( - table_name = 'physiological_archive', - column_names = archive_fields, - values = archive_values - ) diff --git a/python/lib/utilities.py b/python/lib/utilities.py index 837048474..43ce61273 100644 --- a/python/lib/utilities.py +++ b/python/lib/utilities.py @@ -137,6 +137,7 @@ def create_dir(dir_name, verbose): return dir_name +@deprecated('Use `loris_utils.archive.create_archive_with_files` instead') def create_archive(files_to_archive, archive_path): """ Creates an archive with the files listed in the files_to_archive tuple. diff --git a/python/loris_bids_reader/src/loris_bids_reader/eeg/channels.py b/python/loris_bids_reader/src/loris_bids_reader/eeg/channels.py index 6b041737c..ad8f156ac 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/eeg/channels.py +++ b/python/loris_bids_reader/src/loris_bids_reader/eeg/channels.py @@ -32,6 +32,12 @@ def __init__(self, data: dict[str, Any]): # physiological_channel table (a.k.a. 99999.999) data['high_cutoff'] = 99999.999 + if data['high_cutoff'] == 'n/a': + data['high_cutoff'] = None + + if data['low_cutoff'] == 'n/a': + data['low_cutoff'] = None + if re.match(r"n.?a", str(data['notch']), re.IGNORECASE): # replace n/a, N/A, na, NA by None which will translate to NULL # in the physiological_channel table diff --git a/python/loris_bids_reader/src/loris_bids_reader/eeg/reader.py b/python/loris_bids_reader/src/loris_bids_reader/eeg/reader.py new file mode 100644 index 000000000..f4f6d97ea --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/eeg/reader.py @@ -0,0 +1,11 @@ +from collections.abc import Sequence +from functools import cached_property + +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_bids_reader.reader import BidsDataTypeReader + + +class BidsEegDataTypeReader(BidsDataTypeReader): + @cached_property + def acquisitions(self) -> Sequence[BidsAcquisitionInfo]: + return [] diff --git a/python/loris_bids_reader/src/loris_bids_reader/files/participants.py b/python/loris_bids_reader/src/loris_bids_reader/files/participants.py index 6d10732ea..72301c05d 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/files/participants.py +++ b/python/loris_bids_reader/src/loris_bids_reader/files/participants.py @@ -3,7 +3,7 @@ import dateutil.parser from dateutil.parser import ParserError -from loris_utils.iter import find +from loris_utils.iter import find, replace_or_append from loris_bids_reader.tsv import BidsTsvFile, BidsTsvRow @@ -67,3 +67,19 @@ def get_row(self, participant_id: str) -> BidsParticipantTsvRow | None: """ return find(self.rows, lambda row: row.participant_id == participant_id) + + def set_row(self, participant: BidsParticipantTsvRow): + """ + Get the row corresponding to the given participant ID. + """ + + replace_or_append(self.rows, participant, lambda row: row.participant_id == participant.participant_id) + + def merge(self, other: 'BidsParticipantsTsvFile'): + """ + Copy another `participants.tsv` file into this file. The rows of this file are replaced by + those of the other file if there are duplicates. + """ + + for other_row in other.rows: + self.set_row(other_row) diff --git a/python/loris_bids_reader/src/loris_bids_reader/files/scans.py b/python/loris_bids_reader/src/loris_bids_reader/files/scans.py index 9e44a73f7..6468b35c6 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/files/scans.py +++ b/python/loris_bids_reader/src/loris_bids_reader/files/scans.py @@ -2,7 +2,7 @@ from pathlib import Path import dateutil.parser -from loris_utils.iter import find +from loris_utils.iter import find, replace_or_append from loris_bids_reader.tsv import BidsTsvFile, BidsTsvRow @@ -60,8 +60,16 @@ def __init__(self, path: Path): super().__init__(BidsScanTsvRow, path) def get_row(self, file_path: Path) -> BidsScanTsvRow | None: + return find(self.rows, lambda row: file_path.name in row.data['filename']) + + def set_row(self, scan: BidsScanTsvRow): + replace_or_append(self.rows, scan, lambda row: row.data['filename'] == scan.data['filename']) + + def merge(self, other: 'BidsScansTsvFile'): """ - Get the row corresponding to the given file path. + Copy another `scans.tsv` file into this file. The rows of this file are replaced by + those of the other file if there are duplicates. """ - return find(self.rows, lambda row: file_path.name in row.data['filename']) + for other_row in other.rows: + self.set_row(other_row) diff --git a/python/loris_bids_reader/src/loris_bids_reader/info.py b/python/loris_bids_reader/src/loris_bids_reader/info.py index c84a4a369..e106ce6aa 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/info.py +++ b/python/loris_bids_reader/src/loris_bids_reader/info.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from loris_bids_reader.files.participants import BidsParticipantTsvRow +from loris_bids_reader.files.scans import BidsScansTsvFile, BidsScanTsvRow @dataclass @@ -31,6 +32,11 @@ class BidsSessionInfo(BidsSubjectInfo): The BIDS session label. """ + scans_file: BidsScansTsvFile | None + """ + The BIDS `scans.tsv` file of this session, if any. + """ + @dataclass class BidsDataTypeInfo(BidsSessionInfo): @@ -42,3 +48,25 @@ class BidsDataTypeInfo(BidsSessionInfo): """ The BIDS data type name. """ + + +@dataclass +class BidsAcquisitionInfo(BidsDataTypeInfo): + """ + Information about a BIDS acquisition. + """ + + name: str + """ + The name of this acquisition (usually the file name without the extension). + """ + + suffix: str | None + """ + The BIDS suffix of this acquisition, if any. + """ + + scan_row: BidsScanTsvRow | None + """ + The BIDS `scans.tsv` row of this acquisition, if any. + """ diff --git a/python/loris_bids_reader/src/loris_bids_reader/meg/acquisition.py b/python/loris_bids_reader/src/loris_bids_reader/meg/acquisition.py new file mode 100644 index 000000000..81799a0a7 --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/meg/acquisition.py @@ -0,0 +1,37 @@ + +import re +from pathlib import Path + +from loris_utils.path import remove_path_extension + +from loris_bids_reader.eeg.channels import BidsEegChannelsTsvFile +from loris_bids_reader.files.events import BidsEventsTsvFile +from loris_bids_reader.meg.head_shape import MegCtfHeadShapeFile +from loris_bids_reader.meg.sidecar import BidsMegSidecarJsonFile + + +class MegAcquisition: + ctf_path: Path + sidecar_file: BidsMegSidecarJsonFile + channels_file: BidsEegChannelsTsvFile | None + events_file: BidsEventsTsvFile | None + head_shape_file: MegCtfHeadShapeFile | None + + def __init__(self, ctf_path: Path, head_shape_file: MegCtfHeadShapeFile | None): + self.ctf_path = ctf_path + + path = remove_path_extension(ctf_path) + + sidecar_path = path.with_suffix('.json') + if not sidecar_path.exists(): + raise Exception("No MEG JSON sidecar file.") + + self.sidecar_file = BidsMegSidecarJsonFile(sidecar_path) + + channels_path = path.parent / re.sub(r'_meg$', '_channels.tsv', path.name) + self.channels_file = BidsEegChannelsTsvFile(channels_path) if channels_path.exists() else None + + events_path = path.parent / re.sub(r'_meg$', '_events.tsv', path.name) + self.events_file = BidsEventsTsvFile(events_path) if events_path.exists() else None + + self.head_shape_file = head_shape_file diff --git a/python/loris_bids_reader/src/loris_bids_reader/meg/head_shape.py b/python/loris_bids_reader/src/loris_bids_reader/meg/head_shape.py new file mode 100644 index 000000000..9dfaaa487 --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/meg/head_shape.py @@ -0,0 +1,42 @@ +from dataclasses import dataclass +from decimal import Decimal +from pathlib import Path + + +@dataclass +class MegCtfHeadShapePoint: + """ + A point in a MEG CTF `headshape.pos` file. + """ + + x: Decimal + y: Decimal + z: Decimal + + +class MegCtfHeadShapeFile: + """ + A MEG CTF `headshape.pos` file. + """ + + path: Path + """ + The path of this head shape file. + """ + + points: dict[str, MegCtfHeadShapePoint] + """ + The points of this head shape file. + """ + + def __init__(self, path: Path): + with path.open() as file: + lines = file.readlines() + + points: dict[str, MegCtfHeadShapePoint] = {} + for line in lines[1:]: + parts = line.split() + points[parts[0]] = MegCtfHeadShapePoint(Decimal(parts[1]), Decimal(parts[2]), Decimal(parts[3])) + + self.path = path + self.points = points diff --git a/python/loris_bids_reader/src/loris_bids_reader/meg/reader.py b/python/loris_bids_reader/src/loris_bids_reader/meg/reader.py new file mode 100644 index 000000000..6f7da6aee --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/meg/reader.py @@ -0,0 +1,75 @@ +import re +from collections.abc import Iterator +from dataclasses import dataclass +from functools import cached_property +from pathlib import Path + +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_bids_reader.meg.acquisition import MegAcquisition +from loris_bids_reader.meg.head_shape import MegCtfHeadShapeFile +from loris_bids_reader.reader import BidsDataTypeReader +from loris_bids_reader.utils import get_pybids_file_path, try_get_pybids_value + + +@dataclass +class BidsMegDataTypeReader(BidsDataTypeReader): + path: Path + + @cached_property + def acquisitions(self) -> list[tuple[MegAcquisition, BidsAcquisitionInfo]]: + """ + The MEG acquisitions found in the MEG data type. + """ + + acquisitions: list[tuple[MegAcquisition, BidsAcquisitionInfo]] = [] + for ctf_name in find_dir_meg_acquisition_names(self.path): + scan_row = self.session.scans_file.get_row(self.path / ctf_name) \ + if self.session.scans_file is not None else None + + acquisition = MegAcquisition(self.path / ctf_name, self.head_shape_file) + + info = BidsAcquisitionInfo( + subject = self.session.subject.label, + participant_row = self.session.subject.participant_row, + session = self.session.label, + scans_file = self.session.scans_file, + data_type = self.name, + scan_row = scan_row, + name = ctf_name, + suffix = 'meg', + ) + + acquisitions.append((acquisition, info)) + + return acquisitions + + @cached_property + def head_shape_file(self) -> MegCtfHeadShapeFile | None: + """ + The MEG CTF file of this acquisition if it exists. + """ + + head_shape_file = try_get_pybids_value( + self.session.subject.dataset.layout, + subject=self.session.subject.label, + session=self.session.label, + datatype=self.name, + suffix='headshape', + extension='.pos', + ) + + if head_shape_file is None: + return None + + return MegCtfHeadShapeFile(get_pybids_file_path(head_shape_file)) + + +def find_dir_meg_acquisition_names(dir_path: Path) -> Iterator[str]: + """ + Iterate over the Path objects of the NIfTI files found in a directory. + """ + + for item_path in dir_path.iterdir(): + name_match = re.search(r'.+_meg\.ds$', item_path.name) + if name_match is not None: + yield name_match.group(0) diff --git a/python/loris_bids_reader/src/loris_bids_reader/meg/sidecar.py b/python/loris_bids_reader/src/loris_bids_reader/meg/sidecar.py new file mode 100644 index 000000000..3bdd9270f --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/meg/sidecar.py @@ -0,0 +1,12 @@ +from loris_bids_reader.json import BidsJsonFile + + +class BidsMegSidecarJsonFile(BidsJsonFile): + """ + Class representing a BIDS EEG or iEEG sidecar JSON file. + + Documentation: + - https://bids-specification.readthedocs.io/en/stable/modality-specific-files/magnetoencephalography.html#sidecar-json-_megjson + """ + + pass diff --git a/python/loris_bids_reader/src/loris_bids_reader/mri/acquisition.py b/python/loris_bids_reader/src/loris_bids_reader/mri/acquisition.py new file mode 100644 index 000000000..a9c071397 --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/mri/acquisition.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass +from pathlib import Path + +from loris_bids_reader.mri.sidecar import BidsMriSidecarJsonFile + + +@dataclass +class MriAcquisition: + """ + An MRI acquisition and its related files. + """ + + nifti_path: Path + """ + The main NIfTI file path. + """ + + sidecar_file: BidsMriSidecarJsonFile | None + """ + The related JSON sidecar file path, if it exists. + """ + + bval_path: Path | None + """ + The related bval file path, if it exists. + """ + + bvec_path: Path | None + """ + The related bvec file path, if it exists. + """ + + physio_path: Path | None + """ + The related physio file path, if it exists. + """ + + events_path: Path | None + """ + The related events file path, if it exists. + """ diff --git a/python/loris_bids_reader/src/loris_bids_reader/mri/reader.py b/python/loris_bids_reader/src/loris_bids_reader/mri/reader.py new file mode 100644 index 000000000..0cef6077e --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/mri/reader.py @@ -0,0 +1,81 @@ + +from dataclasses import dataclass +from functools import cached_property +from pathlib import Path + +from bids.layout import BIDSFile +from loris_utils.path import remove_path_extension + +from loris_bids_reader.info import BidsAcquisitionInfo +from loris_bids_reader.mri.acquisition import MriAcquisition +from loris_bids_reader.mri.sidecar import BidsMriSidecarJsonFile +from loris_bids_reader.reader import BidsDataTypeReader +from loris_bids_reader.utils import find_pybids_file_path, get_pybids_file_path + + +@dataclass +class BidsMriDataTypeReader(BidsDataTypeReader): + @cached_property + def acquisitions(self) -> list[tuple[MriAcquisition, BidsAcquisitionInfo]]: + pybids_layout = self.session.subject.dataset.layout + pybids_files: list[BIDSFile] = pybids_layout.get( # type: ignore + subject = self.session.subject.label, + session = self.session.label, + datatype = self.name, + extension = ['.nii', '.nii.gz'], + ) + + acquisitions: list[tuple[MriAcquisition, BidsAcquisitionInfo]] = [] + for pybids_file in pybids_files: + nifti_path = get_pybids_file_path(pybids_file) + + # Get all associated files + associations: list[BIDSFile] = pybids_file.get_associations() # type: ignore + + # Find associated files using predicates + sidecar_path = find_pybids_file_path(associations, lambda file: file.entities.get('extension') == '.json') + + pybids_bval_path = pybids_layout.get_nearest(pybids_file, extension='.bval') # type: ignore + bval_path = Path(pybids_bval_path) if pybids_bval_path is not None else None # type: ignore + + pybids_bvec_path = pybids_layout.get_nearest(pybids_file, extension='.bvec') # type: ignore + bvec_path = Path(pybids_bvec_path) if pybids_bvec_path is not None else None # type: ignore + + events_path = find_pybids_file_path( + associations, + lambda file: file.entities.get('suffix') == 'events' and file.entities.get('extension') == '.tsv', + ) + + physio_path = find_pybids_file_path( + associations, + lambda file: file.entities.get('suffix') in ['physio', 'stim'] + and file.entities.get('extension') in ['.tsv.gz', '.tsv'], + ) + + sidecar_file = BidsMriSidecarJsonFile(sidecar_path) if sidecar_path is not None else None + scan_row = self.session.scans_file.get_row(nifti_path) if self.session.scans_file is not None else None + acquisition_name = remove_path_extension(nifti_path).name + + bids_info = BidsAcquisitionInfo( + subject = self.session.subject.label, + participant_row = self.session.subject.participant_row, + session = self.session.label, + scans_file = self.session.scans_file, + data_type = self.name, + scan_row = scan_row, + name = acquisition_name, + suffix = pybids_file.entities.get('suffix'), + ) + + acquisition = MriAcquisition( + nifti_path = nifti_path, + sidecar_file = sidecar_file, + bval_path = bval_path, + bvec_path = bvec_path, + physio_path = physio_path, + events_path = events_path, + ) + + acquisitions.append((acquisition, bids_info)) + + return acquisitions diff --git a/python/loris_bids_reader/src/loris_bids_reader/reader.py b/python/loris_bids_reader/src/loris_bids_reader/reader.py index eeb15b402..0cc47423f 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/reader.py +++ b/python/loris_bids_reader/src/loris_bids_reader/reader.py @@ -1,13 +1,22 @@ import re +from collections.abc import Sequence from dataclasses import dataclass from functools import cached_property from pathlib import Path +from typing import TYPE_CHECKING from bids import BIDSLayout, BIDSLayoutIndexer from loris_bids_reader.files.dataset_description import BidsDatasetDescriptionJsonFile from loris_bids_reader.files.participants import BidsParticipantsTsvFile, BidsParticipantTsvRow +from loris_bids_reader.files.scans import BidsScansTsvFile from loris_bids_reader.info import BidsDataTypeInfo, BidsSessionInfo, BidsSubjectInfo +from loris_bids_reader.json import BidsJsonFile + +# Circular imports +if TYPE_CHECKING: + from loris_bids_reader.meg.reader import BidsMegDataTypeReader + from loris_bids_reader.mri.reader import BidsMriDataTypeReader PYBIDS_IGNORE = ['.git', 'code/', 'log/', 'sourcedata/'] PYBIDS_FORCE_INDEX = [re.compile(r"_annotations\.(tsv|json)$")] @@ -66,6 +75,18 @@ def participants_file(self) -> BidsParticipantsTsvFile | None: return BidsParticipantsTsvFile(participants_path) + @cached_property + def events_dict_file(self) -> BidsJsonFile | None: + """ + The root events dictionary file of this BIDS dataset, if it exists. + """ + + events_dict_path = self.path / 'events.json' + if not events_dict_path.is_file(): + return None + + return BidsJsonFile(events_dict_path) + @cached_property def subject_labels(self) -> list[str]: """ @@ -208,9 +229,42 @@ class BidsSessionReader: """ @cached_property - def data_types(self) -> list['BidsDataTypeReader']: + def scans_file(self) -> BidsScansTsvFile | None: + scans_paths: list[str] = self.subject.dataset.layout.get( # type: ignore + subject=self.subject.label, + session=self.label, + suffix='scans', + return_type='filename', + ) + + if scans_paths == []: + return None + + return BidsScansTsvFile(Path(scans_paths[0])) + + @cached_property + def mri_data_types(self) -> list['BidsMriDataTypeReader']: + """ + Get the MRI data type directory readers of this session. + """ + + from loris_bids_reader.mri.reader import BidsMriDataTypeReader + + return [ + BidsMriDataTypeReader( + session=self, + name=data_type, # type: ignore + ) for data_type in self.subject.dataset.layout.get_datatypes( # type: ignore + subject=self.subject.label, + session=self.label, + datatype=['anat', 'dwi', 'fmap', 'func'], + ) + ] + + @cached_property + def eeg_data_types(self) -> list['BidsDataTypeReader']: """ - Get the data type directory readers of this session. + Get the EEG data type directory readers of this session. """ return [ @@ -220,9 +274,43 @@ def data_types(self) -> list['BidsDataTypeReader']: ) for data_type in self.subject.dataset.layout.get_datatypes( # type: ignore subject=self.subject.label, session=self.label, + datatype=['eeg', 'ieeg'], + ) + ] + + @cached_property + def meg_data_types(self) -> list['BidsMegDataTypeReader']: + """ + Get the MEG data type directory readers of this session. + """ + + from loris_bids_reader.meg.reader import BidsMegDataTypeReader + + return [ + BidsMegDataTypeReader( + session=self, + name=data_type, # type: ignore + path=( + self.subject.dataset.path + / f'sub-{self.subject.label}' + / (f'ses-{self.label}' if self.label is not None else '') + / data_type # type: ignore + ), + ) for data_type in self.subject.dataset.layout.get_datatypes( # type: ignore + subject=self.subject.label, + session=self.label, + datatype=['meg'], ) ] + @cached_property + def data_types(self) -> Sequence['BidsDataTypeReader']: + """ + Get all the data type directory readers of this session. + """ + + return self.eeg_data_types + self.meg_data_types + self.mri_data_types + @cached_property def info(self) -> BidsSessionInfo: """ @@ -233,6 +321,7 @@ def info(self) -> BidsSessionInfo: subject = self.subject.label, participant_row = self.subject.participant_row, session = self.label, + scans_file = self.scans_file, ) @@ -262,5 +351,6 @@ def info(self) -> BidsDataTypeInfo: subject = self.session.subject.label, participant_row = self.session.subject.participant_row, session = self.session.label, + scans_file = self.session.scans_file, data_type = self.name, ) diff --git a/python/loris_bids_reader/src/loris_bids_reader/tsv.py b/python/loris_bids_reader/src/loris_bids_reader/tsv.py index d2e500e47..eecbb8de1 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/tsv.py +++ b/python/loris_bids_reader/src/loris_bids_reader/tsv.py @@ -2,6 +2,10 @@ from pathlib import Path from typing import Any, Generic, TypeVar +from loris_utils.path import replace_path_extension + +from loris_bids_reader.json import BidsJsonFile + class BidsTsvRow: """ @@ -25,13 +29,50 @@ class BidsTsvFile(Generic[T]): """ path: Path + dictionary: BidsJsonFile | None rows: list[T] def __init__(self, model: type[T], path: Path): self.path = path self.rows = [] + dictionary_path = replace_path_extension(self.path, 'json') + if dictionary_path.exists(): + self.dictionary = BidsJsonFile(dictionary_path) + else: + self.dictionary = None + with open(self.path, encoding='utf-8-sig') as file: reader = csv.DictReader(file, delimiter='\t') for row in reader: self.rows.append(model(row)) + + def get_field_names(self) -> list[str]: + """ + Get the names of the fields of this file. + """ + + fields: list[str] = [] + for row in self.rows: + for field in row.data.keys(): + if field not in fields: + fields.append(field) + + return fields + + def write(self, path: Path, fields: list[str] | None = None): + """ + Write the TSV file to a path, writing either given fields, or the populated fields by + default. + """ + + if fields is None: + fields = self.get_field_names() + + with open(path, 'w', newline='') as file: + writer = csv.DictWriter(file, fieldnames=fields, delimiter='\t') + writer.writeheader() + + for row in self.rows: + filtered_row = {field: row.data[field] if field in row.data else None for field in fields} + writer.writerow(filtered_row) diff --git a/python/loris_bids_reader/src/loris_bids_reader/utils.py b/python/loris_bids_reader/src/loris_bids_reader/utils.py new file mode 100644 index 000000000..d3aaed347 --- /dev/null +++ b/python/loris_bids_reader/src/loris_bids_reader/utils.py @@ -0,0 +1,41 @@ +from collections.abc import Callable +from pathlib import Path +from typing import Any + +from bids import BIDSLayout +from bids.layout import BIDSFile +from loris_utils.iter import find + + +def try_get_pybids_value(layout: BIDSLayout, **args: Any) -> Any | None: + """ + Get zero or one PyBIDS value using the provided arguments, or raise an exception if multiple + values are found. + """ + + match layout.get(**args): # type: ignore + case []: + return None + case [value]: # type: ignore + return value # type: ignore + case values: # type: ignore + raise Exception(f"Expected one or zero PyBIDS value but found {len(values)}.") # type: ignore + + +def get_pybids_file_path(file: BIDSFile) -> Path: + """ + Get the path of a PyBIDS file. + """ + + # The PyBIDS file class does not use the standard path object nor supports type checking. + return Path(file.path) # type: ignore + + +def find_pybids_file_path(files: list[BIDSFile], predicate: Callable[[BIDSFile], bool]) -> Path | None: + """ + Find the path of a file in a list of PyBIDS files using a predicate, or return `None` if no + file matches the predicate. + """ + + file = find(files, predicate) + return get_pybids_file_path(file) if file is not None else None diff --git a/python/loris_eeg_chunker/src/loris_eeg_chunker/scripts/eeglab_to_chunks.py b/python/loris_eeg_chunker/src/loris_eeg_chunker/scripts/eeglab_to_chunks.py index bff2f78e0..8061e8f78 100755 --- a/python/loris_eeg_chunker/src/loris_eeg_chunker/scripts/eeglab_to_chunks.py +++ b/python/loris_eeg_chunker/src/loris_eeg_chunker/scripts/eeglab_to_chunks.py @@ -39,7 +39,6 @@ def main(): eeg = mne_eeglab._check_load_mat(path, None) # type: ignore eeglab_info = mne_eeglab._get_info(eeg, eog=(), montage_units="auto") # type: ignore channel_names = cast(list[str], eeglab_info[0]['ch_names']) - if args.channel_index < 0: sys.exit("Channel index must be a positive integer") diff --git a/python/loris_ephys_server/README.md b/python/loris_ephys_server/README.md new file mode 100644 index 000000000..172e789bc --- /dev/null +++ b/python/loris_ephys_server/README.md @@ -0,0 +1,3 @@ +# LORIS electrophysiology server + +The LORIS electrophysiology server. diff --git a/python/loris_ephys_server/pyproject.toml b/python/loris_ephys_server/pyproject.toml new file mode 100644 index 000000000..24e7247c3 --- /dev/null +++ b/python/loris_ephys_server/pyproject.toml @@ -0,0 +1,27 @@ +[project] +name = "loris-ephys-server" +version = "27.0.0" +description = "LORIS electrophysiology server" +readme = "README.md" +requires-python = ">= 3.11" +dependencies = [ + "fastapi", + "mne", + "mne-bids", + "numpy", + "uvicorn[standard]", +] + +[project.entry-points."loris_server.loaders"] +loris-ephys-server = "loris_ephys_server.main:load" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/loris_ephys_server"] + +[tool.ruff] +extend = "../../pyproject.toml" +src = ["src"] diff --git a/python/loris_ephys_server/src/loris_ephys_server/endpoints/meg_head_shape.py b/python/loris_ephys_server/src/loris_ephys_server/endpoints/meg_head_shape.py new file mode 100644 index 000000000..8bb311e67 --- /dev/null +++ b/python/loris_ephys_server/src/loris_ephys_server/endpoints/meg_head_shape.py @@ -0,0 +1,47 @@ +from fastapi import HTTPException +from lib.config import get_data_dir_path_config +from lib.db.queries.physio_file import try_get_physio_file_with_id +from lib.env import Env +from loris_bids_reader.meg.head_shape import MegCtfHeadShapeFile +from pydantic import BaseModel + + +class MegHeadShapePoint(BaseModel): + x: float + y: float + z: float + + +class MegHeadShapeResponse(BaseModel): + points: dict[str, MegHeadShapePoint] + + +def get_meg_head_shape(env: Env, physio_file_id: int): + """ + Get the head shape points of a LORIS MEG file. + """ + + physio_file = try_get_physio_file_with_id(env.db, physio_file_id) + if physio_file is None: + raise HTTPException(status_code=404, detail="Electrophysiology file not found.") + + if physio_file.type != 'ctf': + raise HTTPException(status_code=404, detail="Electrophysiology file is not an MEG file.") + + if physio_file.head_shape_file is None: + raise HTTPException(status_code=404, detail="Headshape file not found.") + + data_dir_path = get_data_dir_path_config(env) + + head_shape_path = data_dir_path / physio_file.head_shape_file.path + head_shape_file = MegCtfHeadShapeFile(head_shape_path) + + points: dict[str, MegHeadShapePoint] = {} + for name, point in head_shape_file.points.items(): + points[name] = MegHeadShapePoint( + x = float(point.x) / 100, + y = float(point.y) / 100, + z = float(point.z) / 100, + ) + + return MegHeadShapeResponse(points=points) diff --git a/python/loris_ephys_server/src/loris_ephys_server/endpoints/meg_sensors.py b/python/loris_ephys_server/src/loris_ephys_server/endpoints/meg_sensors.py new file mode 100644 index 000000000..923e8e55c --- /dev/null +++ b/python/loris_ephys_server/src/loris_ephys_server/endpoints/meg_sensors.py @@ -0,0 +1,63 @@ +import mne.io +import numpy as np +from fastapi import HTTPException +from lib.config import get_data_dir_path_config +from lib.db.queries.physio_file import try_get_physio_file_with_id +from lib.env import Env +from pydantic import BaseModel + + +class MegSensorPoint(BaseModel): + x: float + y: float + z: float + + +class MegSensorsResponse(BaseModel): + sensors: dict[str, MegSensorPoint] + + +def get_meg_sensors(env: Env, physio_file_id: int) -> MegSensorsResponse: + """ + Get the head MEG sensors of a LORIS MEG file. + """ + + physio_file = try_get_physio_file_with_id(env.db, physio_file_id) + if physio_file is None: + raise HTTPException(status_code=404, detail="Electrophysiology file not found.") + + if physio_file.type != 'ctf': + raise HTTPException(status_code=404, detail="Electrophysiology file is not an MEG file.") + + data_dir_path = get_data_dir_path_config(env) + + raw = mne.io.read_raw_ctf(data_dir_path / physio_file.path) # type: ignore + + # Get the transformation from the device to the head coordinates system. + dev_head_t = raw.info.get('dev_head_t') # type: ignore + if dev_head_t is None: + raise HTTPException(status_code=500, detail="No device-to-head transformation found in the CTF file.") + + # The transformation matrix is a 4x4 array. + transform = dev_head_t['trans'] # type: ignore + + sensors: dict[str, MegSensorPoint] = {} + for channel in raw.info["chs"]: # type: ignore + # Sensor position in device coordinates (meters) + device_pos = np.array([ + float(channel['loc'][0]), # type: ignore + float(channel['loc'][1]), # type: ignore + float(channel['loc'][2]), # type: ignore + 1.0 # Homogeneous coordinates + ]) + + # Transform to head coordinates + head_pos = transform @ device_pos # type: ignore + + sensors[channel['ch_name']] = MegSensorPoint( + x = float(head_pos[0]), # type: ignore + y = float(head_pos[1]), # type: ignore + z = float(head_pos[2]), # type: ignore + ) + + return MegSensorsResponse(sensors=sensors) diff --git a/python/loris_ephys_server/src/loris_ephys_server/jsonize.py b/python/loris_ephys_server/src/loris_ephys_server/jsonize.py new file mode 100644 index 000000000..08868c0cb --- /dev/null +++ b/python/loris_ephys_server/src/loris_ephys_server/jsonize.py @@ -0,0 +1,71 @@ +import math +import uuid +from datetime import date, datetime +from decimal import Decimal +from typing import Any + +import numpy as np + +JsonPrimitive = str | int | float | bool | None +JsonValue = JsonPrimitive | dict[str, 'JsonValue'] | list['JsonValue'] + + +def jsonize(value: Any) -> JsonValue: + """ + Recursively convert a value to a JSON-like value. + """ + + if value is None or isinstance(value, (str, int, bool)): + return value + + # Handle float special cases + if isinstance(value, float): + if math.isinf(value) or math.isnan(value): + return str(value) + return value + + # Handle numpy types + if isinstance(value, np.ndarray): + if value.dtype.kind == 'f': # type: ignore + if np.any(np.isinf(value)) or np.any(np.isnan(value)): # type: ignore + return [ + str(x) + if (isinstance(x, float) and (math.isinf(x) or math.isnan(x))) else jsonize(x) + for x in value.tolist() + ] + + return value.tolist() + + if isinstance(value, np.integer): + return int(value) # type: ignore + + if isinstance(value, np.floating): + if np.isinf(value) or np.isnan(value): # type: ignore + return str(value) # type: ignore + + return float(value) # type: ignore + + if isinstance(value, np.bool_): + return bool(value) # type: ignore + + # Handle datetime/dates + if isinstance(value, (datetime, date)): + return value.isoformat() + + # Handle Decimal + if isinstance(value, Decimal): + return float(value) + + # Handle UUID + if isinstance(value, uuid.UUID): + return str(value) + + # Handle iterables (list, tuple, set) + if isinstance(value, (list, tuple, set)): + return [jsonize(item) for item in value] # type: ignore + + # Handle dictionaries + if isinstance(value, dict): + return {str(k): jsonize(v) for k, v in value.items()} # type: ignore + + raise Exception(value) diff --git a/python/loris_ephys_server/src/loris_ephys_server/main.py b/python/loris_ephys_server/src/loris_ephys_server/main.py new file mode 100644 index 000000000..ffa64f53b --- /dev/null +++ b/python/loris_ephys_server/src/loris_ephys_server/main.py @@ -0,0 +1,21 @@ +from fastapi import APIRouter, FastAPI +from loris_server.dependencies import EnvDep + +from loris_ephys_server.endpoints.meg_head_shape import MegHeadShapeResponse, get_meg_head_shape +from loris_ephys_server.endpoints.meg_sensors import MegSensorsResponse, get_meg_sensors + +router = APIRouter(prefix='/ephys') + + +@router.get('/meg/{physio_file_id}/sensors', response_model=MegSensorsResponse) +def meg_sensors(physio_file_id: int, env: EnvDep): + return get_meg_sensors(env, physio_file_id) + + +@router.get('/meg/{physio_file_id}/headshape', response_model=MegHeadShapeResponse) +def meg_head_shape(physio_file_id: int, env: EnvDep): + return get_meg_head_shape(env, physio_file_id) + + +def load(api: FastAPI): + return api.include_router(router) diff --git a/python/loris_server/README.md b/python/loris_server/README.md new file mode 100644 index 000000000..88d13d719 --- /dev/null +++ b/python/loris_server/README.md @@ -0,0 +1,39 @@ +# LORIS Python server + +## Installation + +This package can be installed with the following command (from the LORIS Python root directory): + +```sh +pip install python/loris_server +``` + +## Deployment + +The LORIS Python server can be deployed as a standard Linux service, this can be done using a service file such as `/etc/systemd/system/loris-server.service`, with a content such as the following: + +```ini +[Unit] +Description=LORIS Python server +After=network.target + +[Service] +User=lorisadmin +Group=lorisadmin +WorkingDirectory=/opt/loris/bin/mri +ExecStart=/bin/bash -c 'source environment && exec run-loris-server' +Restart=always +RestartSec=5 +Environment="PYTHONUNBUFFERED=1" + +[Install] +WantedBy=multi-user.target +``` + +The LORIS Python server can then be used as any Linux service with commands such as the following: +- `systemctl start loris-server` to start the server. +- `systemctl stop loris-server` to stop the server. +- `systemctl restart loris-server` to restart the server. +- `journalctl -u loris-server` to view the server logs. +- `journalctl -u loris-server -f` to view the server logs in real-time. +- `journalctl -u loris-server -p err` to view only the server error logs. diff --git a/python/loris_server/pyproject.toml b/python/loris_server/pyproject.toml new file mode 100644 index 000000000..13080b509 --- /dev/null +++ b/python/loris_server/pyproject.toml @@ -0,0 +1,24 @@ +[project] +name = "loris-server" +version = "27.0.0" +description = "LORIS server" +readme = "README.md" +requires-python = ">= 3.11" +dependencies = [ + "fastapi", + "uvicorn[standard]", +] + +[project.scripts] +run-loris-server = "loris_server.cli.run_loris_server:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/loris_server"] + +[tool.ruff] +extend = "../../pyproject.toml" +src = ["src"] diff --git a/python/loris_server/src/loris_server/api.py b/python/loris_server/src/loris_server/api.py new file mode 100644 index 000000000..0931a9793 --- /dev/null +++ b/python/loris_server/src/loris_server/api.py @@ -0,0 +1,28 @@ +import os +from importlib.metadata import entry_points + +from fastapi import FastAPI +from lib.config_file import load_config + +from loris_server.endpoints.health import health + +# Get the LORIS configuration values from the environment. +config_file_name = os.environ.get('LORIS_CONFIG_FILE') +dev_mode = os.environ.get('LORIS_DEV_MODE') == 'true' + +# Load the LORIS configuration. +config = load_config(config_file_name) + +# Create the API object. +api = FastAPI(title="LORIS server", debug=dev_mode) + +# Attach the LORIS configuration to the API state. +api.state.config = config + +# Add the health check route to the API. +api.add_api_route('/health', health, methods=['GET']) + +# Load the modules registered into the LORIS server. +for module in entry_points(group='loris_server.loaders'): + print(f"Loading module '{module.name}'") + module.load()(api) diff --git a/python/loris_server/src/loris_server/cli/run_loris_server.py b/python/loris_server/src/loris_server/cli/run_loris_server.py new file mode 100644 index 000000000..04b6d814b --- /dev/null +++ b/python/loris_server/src/loris_server/cli/run_loris_server.py @@ -0,0 +1,53 @@ +import argparse +import os + +import uvicorn + + +def main(): + parser = argparse.ArgumentParser( + description="Start the LORIS server", + ) + + parser.add_argument( + '--config', + help='Name of the LORIS configuration file') + + parser.add_argument( + '--dev', + action='store_true', + help="Run in development mode with hot reload" + ) + + parser.add_argument( + '--host', + default='127.0.0.1', + help="Host to bind to (default: 127.0.0.1)" + ) + + parser.add_argument( + '--port', + type=int, + default=8000, + help="Port to bind to (default: 8000)" + ) + + args = parser.parse_args() + + if args.config is not None: + os.environ['LORIS_CONFIG_FILE'] = args.config + + if args.dev: + os.environ['LORIS_DEV_MODE'] = 'true' + + uvicorn.run( + 'loris_server.api:api', + host=args.host, + port=args.port, + reload=args.dev, + log_level='debug' if args.dev else 'info' + ) + + +if __name__ == '__main__': + main() diff --git a/python/loris_server/src/loris_server/dependencies.py b/python/loris_server/src/loris_server/dependencies.py new file mode 100644 index 000000000..4f17c23dd --- /dev/null +++ b/python/loris_server/src/loris_server/dependencies.py @@ -0,0 +1,22 @@ +import os +from typing import Annotated + +from fastapi import Depends, Request +from lib.env import Env +from lib.make_env import make_env + + +def get_server_env(request: Request) -> Env: + """ + Get the LORIS environment. + """ + + config = request.app.state.config + if config is None: + raise RuntimeError("Server configuration not initialized.") + + # Create the LORIS environment object for this request. + return make_env('server', {}, config, os.environ['TMPDIR'], False) + + +EnvDep = Annotated[Env, Depends(get_server_env)] diff --git a/python/loris_server/src/loris_server/endpoints/health.py b/python/loris_server/src/loris_server/endpoints/health.py new file mode 100644 index 000000000..5cefb7143 --- /dev/null +++ b/python/loris_server/src/loris_server/endpoints/health.py @@ -0,0 +1,5 @@ +from fastapi.responses import PlainTextResponse + + +def health(): + return PlainTextResponse("It works!") diff --git a/python/loris_utils/src/loris_utils/archive.py b/python/loris_utils/src/loris_utils/archive.py new file mode 100644 index 000000000..92723d880 --- /dev/null +++ b/python/loris_utils/src/loris_utils/archive.py @@ -0,0 +1,22 @@ +import tarfile +from pathlib import Path + + +def create_archive_with_file(archive_path: Path, file_path: Path): + """ + Create a tar archive with the provided file. + """ + + with tarfile.open(archive_path, 'w:gz') as tar: + tar.add(file_path, arcname=file_path.name) + + +def create_archive_with_files(archive_path: Path, file_paths: list[Path]): + """ + Create a tar archive with the provided files. Files are added to the archive using their base + name, so the name of the provided files should all be distinct. + """ + + with tarfile.open(archive_path, 'w:gz') as tar: + for file_path in file_paths: + tar.add(file_path, arcname=file_path.name) diff --git a/python/loris_utils/src/loris_utils/crypto.py b/python/loris_utils/src/loris_utils/crypto.py index 84c25910c..64d1aff4d 100644 --- a/python/loris_utils/src/loris_utils/crypto.py +++ b/python/loris_utils/src/loris_utils/crypto.py @@ -1,19 +1,55 @@ import hashlib +from hashlib import blake2b from pathlib import Path -def compute_file_blake2b_hash(file_path: Path | str) -> str: +def compute_file_blake2b_hash(file_path: Path) -> str: """ Compute the BLAKE2b hash of a file. """ + hash = blake2b() + update_file_blake2b_hash(Path(file_path), hash) + return hash.hexdigest() + + +def compute_directory_blake2b_hash(dir_path: Path) -> str: + """ + Compute the BLAKE2b hash of a directory. + """ + + hash = blake2b() + update_directory_blake2b_hash(dir_path, hash) + return hash.hexdigest() + + +def update_file_blake2b_hash(file_path: Path, hash: blake2b): + """ + Update a BLAKE2b hash with the contents of a file. + """ + # Since the file given to this function may be large, we read it in chunks to avoid running # out of memory. - hash = hashlib.blake2b() with open(file_path, 'rb') as file: while chunk := file.read(1048576): hash.update(chunk) - return hash.hexdigest() + + +def update_directory_blake2b_hash(dir_path: Path, hash: blake2b): + """ + Update a BLAKE2b hash with the contents of a directory. + """ + + # The paths are sorted to ensure the hash is deterministic regardless of iteration order. + for path in sorted(dir_path.iterdir()): + # The file name is included in the hash to ensure the directory structure is reflected in + # the hash. + hash.update(path.name.encode()) + # Symlinks are currently not included in the hash. + if path.is_file(): + update_file_blake2b_hash(path, hash) + elif path.is_dir(): + update_directory_blake2b_hash(path, hash) def compute_file_md5_hash(file_path: Path | str) -> str: diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py deleted file mode 100755 index 68c0a21a2..000000000 --- a/python/scripts/bids_import.py +++ /dev/null @@ -1,477 +0,0 @@ -#!/usr/bin/env python - -"""Script to import BIDS structure into LORIS.""" - -import getopt -import json -import os -import re -import sys -from pathlib import Path - -from loris_bids_reader.files.participants import BidsParticipantsTsvFile -from loris_bids_reader.reader import BidsDatasetReader -from loris_utils.crypto import compute_file_blake2b_hash - -import lib.exitcode -import lib.physiological -import lib.utilities -from lib.candidate import Candidate -from lib.config import get_default_bids_visit_label_config -from lib.config_file import load_config -from lib.database import Database -from lib.database_lib.config import Config -from lib.db.queries.candidate import try_get_candidate_with_cand_id, try_get_candidate_with_psc_id -from lib.db.queries.session import try_get_session_with_cand_id_visit_label -from lib.eeg import Eeg -from lib.env import Env -from lib.import_bids_dataset.check_sessions import check_or_create_bids_sessions -from lib.import_bids_dataset.check_subjects import check_or_create_bids_subjects -from lib.import_bids_dataset.env import BidsImportEnv -from lib.make_env import make_env -from lib.mri import Mri - - -def main(): - bids_dir = '' - verbose = False - createcand = False - createvisit = False - idsvalidation = False - nobidsvalidation = False - type = None - profile = None - nocopy = False - - long_options = [ - "help", "profile=", "directory=", - "createcandidate", "createsession", "idsvalidation", - "nobidsvalidation", "nocopy", "type=", - "verbose" - ] - usage = ( - '\n' - 'usage : bids_import -d -p \n\n' - 'options: \n' - '\t-p, --profile : name of the python database config file in the config directory\n' - '\t-d, --directory : BIDS directory to parse & insert into LORIS\n' - 'If directory is within $data_dir/assembly_bids, no copy will be performed' - '\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n' - '\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n' - '\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n' - '\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n' - '\t-a, --nocopy : to disable dataset copy in data assembly_bids\n' - '\t-t, --type : raw | derivative. Specify the dataset type.' - 'If not set, the pipeline will look for both raw and derivative files.\n' - 'Required if no dataset_description.json is found.\n' - '\t-v, --verbose : be verbose\n' - ) - - try: - opts, _ = getopt.getopt(sys.argv[1:], 'hp:d:csinat:v', long_options) - except getopt.GetoptError: - print(usage) - sys.exit(lib.exitcode.GETOPT_FAILURE) - - for opt, arg in opts: - if opt in ('-h', '--help'): - print(usage) - sys.exit() - elif opt in ('-p', '--profile'): - profile = arg - elif opt in ('-d', '--directory'): - bids_dir = arg - elif opt in ('-v', '--verbose'): - verbose = True - elif opt in ('-c', '--createcandidate'): - createcand = True - elif opt in ('-s', '--createsession'): - createvisit = True - elif opt in ('-i', '--idsvalidation'): - idsvalidation = True - elif opt in ('-n', '--nobidsvalidation'): - nobidsvalidation = True - elif opt in ('-a', '--nocopy'): - nocopy = True - elif opt in ('-t', '--type'): - type = arg - - # input error checking and load config_file file - config_file = load_config(profile) - input_error_checking(bids_dir, usage) - tmp_dir_path = lib.utilities.create_processing_tmp_dir('mass_nifti_pic') - env = make_env('bids_import', {}, config_file, tmp_dir_path, verbose) - - dataset_json = bids_dir + "/dataset_description.json" - if not os.path.isfile(dataset_json) and not type: - print('No dataset_description.json found. Please run with the --type option.') - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if type and type not in ('raw', 'derivative'): - print("--type must be one of 'raw', 'derivative'") - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - config_obj = Config(db, verbose) - data_dir = config_obj.get_config('dataDirBasepath') - # making sure that there is a final / in data_dir - data_dir = data_dir if data_dir.endswith('/') else data_dir + "/" - - # read and insert BIDS data - read_and_insert_bids( - env, - bids_dir, - data_dir, - verbose, - createcand, - createvisit, - idsvalidation, - nobidsvalidation, - type, - nocopy, - db - ) - - -def input_error_checking(bids_dir, usage): - """ - Checks whether the required inputs are set and that paths are valid. - - :param bids_dir: path to the BIDS directory to parse and insert into LORIS - :type bids_dir: str - :param usage : script usage to be displayed when encountering an error - :type usage : st - """ - - if not bids_dir: - message = '\n\tERROR: you must specify a BIDS directory using -d or ' \ - '--directory option' - print(message) - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if not os.path.isdir(bids_dir): - message = '\n\tERROR: you must specify a valid BIDS directory.\n' + \ - bids_dir + ' does not exist!' - print(message) - print(usage) - sys.exit(lib.exitcode.INVALID_PATH) - - -def read_and_insert_bids( - env: Env, bids_dir, data_dir, verbose, createcand, createvisit, - idsvalidation, nobidsvalidation, type, nocopy, db -): - """ - Read the provided BIDS structure and import it into the database. - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param data_dir : data_dir config value - :type data_dir : string - :param verbose : flag for more printing if set - :type verbose : bool - :param createcand : allow database candidate creation if it did not exist already - :type createcand : bool - :param createvisit : allow database visit creation if it did not exist already - :type createvisit : bool - :param idsvalidation : allow pscid/candid validation in the BIDS directory name - :type idsvalidation : bool - :param nobidsvalidation : disable bids dataset validation - :type nobidsvalidation : bool - :param type : raw | derivative. Type of the dataset - :type type : string - :param nocopy : disable bids dataset copy in assembly_bids - :type nocopy : bool - :param db : db object - :type db : object - - """ - - # grep config settings from the Config module - default_bids_vl = get_default_bids_visit_label_config(env) - - # Validate that pscid and candid matches - if idsvalidation: - validateids(bids_dir, db, verbose) - - # load the BIDS directory - bids_reader = BidsDatasetReader(Path(bids_dir), not nobidsvalidation) - - if bids_reader.data_types == []: - print(f"Could not read the BIDS directory '{bids_dir}'.") - sys.exit(lib.exitcode.UNREADABLE_FILE) - - print("List of subjects found in the BIDS dataset:") - for subject_label in bids_reader.subject_labels: - print(f"- {subject_label}") - - print("List of sessions found in the BIDS dataset:") - for session_label in bids_reader.session_labels: - print(f"- {session_label}") - - if bids_reader.participants_file is not None: - validate_participants(bids_reader, bids_reader.participants_file) - - loris_bids_root_dir = None - if not nocopy: - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose - ) - - check_or_create_bids_subjects( - env, - [subject.info for subject in bids_reader.subjects], - createcand, - ) - - sessions = check_or_create_bids_sessions( - env, - [session.info for session in bids_reader.sessions], - createvisit, - ) - - env.db.commit() - - # Assumption all same project (for project-wide tags) - single_project_id = sessions[0].project.id - - # Import root-level (dataset-wide) events.json - # Assumption: Single project for project-wide tags - bids_layout = bids_reader.layout - root_event_metadata_file = bids_layout.get_nearest( - bids_dir, - return_type='tuple', - strict=False, - extension='json', - suffix='events', - all_=False, - subject=None, - session=None - ) - - dataset_tag_dict = {} - if not root_event_metadata_file: - message = '\nWARNING: no events metadata files (events.json) in ' \ - 'root directory' - print(message) - else: - # copy the event file to the LORIS BIDS import directory - copy_file = str.replace( - root_event_metadata_file.path, - bids_layout.root, - "" - ).lstrip('/') - - if not nocopy: - event_metadata_path = loris_bids_root_dir + copy_file - lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, verbose) - - # TODO: Move - hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' - hed_union = db.pselect(query=hed_query, args=()) - - # load json data - with open(root_event_metadata_file.path) as metadata_file: - event_metadata = json.load(metadata_file) - blake2 = compute_file_blake2b_hash(root_event_metadata_file.path) - physio = lib.physiological.Physiological(env, db, verbose) - _, dataset_tag_dict = physio.insert_event_metadata( - event_metadata=event_metadata, - event_metadata_file=event_metadata_path, - physiological_file=None, - project_id=single_project_id, - blake2=blake2, - project_wide=True, - hed_union=hed_union - ) - - import_env = BidsImportEnv( - data_dir_path = Path(data_dir), - source_bids_path = Path(bids_dir), - loris_bids_path = Path(loris_bids_root_dir).relative_to(data_dir) if loris_bids_root_dir is not None else None, - ) - - # read list of modalities per session / candidate and register data - for data_type_reader in bids_reader.data_types: - bids_info = data_type_reader.info - visit_label = bids_info.session if bids_info.session is not None else default_bids_vl - loris_bids_data_type_rel_dir = os.path.join( - f'sub-{bids_info.subject}', - f'ses-{visit_label}', - bids_info.data_type, - ) - - if not nocopy: - lib.utilities.create_dir(os.path.join(loris_bids_root_dir, loris_bids_data_type_rel_dir), verbose) - - candidate = try_get_candidate_with_cand_id(env.db, bids_info.subject) - if candidate is None: - candidate = try_get_candidate_with_psc_id(env.db, bids_info.subject) - - session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) - - match bids_info.data_type: - case 'eeg' | 'ieeg': - Eeg( - env, - import_env, - bids_layout = bids_reader.layout, - session = session, - bids_info = bids_info, - db = db, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type - ) - case 'anat' | 'dwi' | 'fmap' | 'func': - Mri( - env, - bids_layout = bids_reader.layout, - session = session, - bids_sub_id = bids_info.subject, - bids_ses_id = bids_info.session, - bids_modality = bids_info.data_type, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_data_type_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) - case _: - print(f"Data type {bids_info.data_type} is not supported. Skipping.") - - # disconnect from the database - db.disconnect() - - -def validateids(bids_dir, db, verbose): - """ - Validate that pscid and candid matches - - :param bids_dir : path to the BIDS directory - :type bids_dir : str - :param db : database handler object - :type db : object - :param verbose : flag for more printing if set - :type verbose : bool - """ - - bids_folder = bids_dir.rstrip('/').split('/')[-1] - bids_folder_parts = bids_folder.split('_') - psc_id = bids_folder_parts[0] - cand_id = bids_folder_parts[1] - - candidate = Candidate(verbose, cand_id=cand_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - print("ERROR: could not find a candidate with cand_id " + cand_id + ".") - sys.exit(lib.exitcode.CANDID_NOT_FOUND) - if loris_cand_info['PSCID'] != psc_id: - print("ERROR: cand_id " + cand_id + " and psc_id " + psc_id + " do not match.") - sys.exit(lib.exitcode.CANDIDATE_MISMATCH) - - -def create_loris_bids_directory(bids_reader: BidsDatasetReader, data_dir, verbose): - """ - Creates the LORIS BIDS import root directory (with name and BIDS version) - and copy over the dataset_description.json, README and participants.tsv - files. - - :param bids_reader: BIDS information handler object - :type bids_reader: object - :param data_dir : path of the LORIS data directory - :type data_dir : str - :param verbose : if true, prints out information while executing - :type verbose : bool - - :return: path to the LORIS BIDS import root directory - :rtype: str - """ - - if bids_reader.dataset_description_file is None: - print("ERROR: Could not read BIDS dataset description.") - sys.exit(lib.exitcode.UNREADABLE_FILE) - - # determine the root directory of the LORIS BIDS and create it if does not exist - name = re.sub(r"[^0-9a-zA-Z]+", "_", bids_reader.dataset_description_file.data['Name']) - version = re.sub(r"[^0-9a-zA-Z\.]+", "_", bids_reader.dataset_description_file.data['BIDSVersion']) - - # the LORIS BIDS directory will be in data_dir/BIDS/ and named with the - # concatenation of the dataset name and the BIDS version - loris_bids_dirname = lib.utilities.create_dir( - os.path.join(data_dir, 'bids_imports', f'{name}_BIDSVersion_{version}'), - verbose - ) - - # copy the dataset JSON file to the new directory - lib.utilities.copy_file( - bids_reader.path / "dataset_description.json", - os.path.join(loris_bids_dirname, "dataset_description.json"), - verbose - ) - - # copy the README file to the new directory - if os.path.isfile(bids_reader.path / "README"): - lib.utilities.copy_file( - bids_reader.path / "README", - os.path.join(loris_bids_dirname, "README"), - verbose - ) - - # copy the participant.tsv file to the new directory - if os.path.exists(os.path.join(loris_bids_dirname, "participants.tsv")): - lib.utilities.append_to_tsv_file( - bids_reader.path / "participants.tsv", - os.path.join(loris_bids_dirname, "participants.tsv"), - "participant_id", - verbose - ) - else: - lib.utilities.copy_file( - bids_reader.path / "participants.tsv", - os.path.join(loris_bids_dirname, "participants.tsv"), - verbose - ) - - return loris_bids_dirname - - -def validate_participants(bids_reader: BidsDatasetReader, participants_file: BidsParticipantsTsvFile): - """ - Validates whether the subjects listed in participants.tsv match the - list of participant directory. If there is a mismatch, will exit with - error code from lib.exitcode. - """ - - subjects = bids_reader.subject_labels.copy() - - mismatch_message = ("\nERROR: Participant ID mismatch between " - "participants.tsv and raw data found in the BIDS " - "directory") - - # check that all subjects listed in participants_info are also in - # subjects array and vice versa - for row in participants_file.rows: - if row.participant_id not in subjects: - print(mismatch_message) - print(row.participant_id + 'is missing from the BIDS Layout') - print('List of subjects parsed by the BIDS layout: ' + ', '.join(subjects)) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - # remove the subject from the list of subjects - subjects.remove(row.participant_id) - # check that no subjects are left in subjects array - if subjects: - print(mismatch_message) - sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) - - -if __name__ == "__main__": - main() diff --git a/python/scripts/import_bids_dataset.py b/python/scripts/import_bids_dataset.py new file mode 100755 index 000000000..16e3202b1 --- /dev/null +++ b/python/scripts/import_bids_dataset.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python + +"""Script to import BIDS structure into LORIS.""" + +from pathlib import Path +from typing import Any + +import lib.exitcode +from lib.import_bids_dataset.args import Args +from lib.import_bids_dataset.main import import_bids_dataset +from lib.logging import log_error_exit +from lib.lorisgetopt import LorisGetOpt +from lib.make_env import make_env_from_opts + + +def pack_args(options_dict: dict[str, Any]) -> Args: + return Args( + source_bids_path = Path(options_dict['directory']['value']), + type = options_dict['type']['value'], + bids_validation = not options_dict['nobidsvalidation']['value'], + create_candidate = options_dict['createcandidate']['value'], + create_session = options_dict['createsession']['value'], + copy = not options_dict['nocopy']['value'], + verbose = options_dict['verbose']['value'], + ) + + +# to limit the traceback when raising exceptions. +# sys.tracebacklimit = 0 + +def main(): + usage = ( + "\n" + "usage : bids_import -d -p \n" + "\n" + "options: \n" + "\t-p, --profile : name of the python database config file in dicom-archive/.loris-mri\n" + "\t-d, --directory : BIDS directory to parse & insert into LORIS\n" + "\t If directory is within $data_dir/assembly_bids, no copy will be performed\n" + "\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n" + "\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n" + "\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n" + "\t-a, --nocopy : to disable dataset copy in data assembly_bids\n" + "\t-t, --type : raw | derivative. Specify the dataset type.\n" + "\t If not set, the pipeline will look for both raw and derivative files.\n" + "\t Required if no dataset_description.json is found.\n" + "\t-v, --verbose : be verbose\n" + ) + + options_dict = { + "profile": { + "value": None, "required": False, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "directory": { + "value": None, "required": True, "expect_arg": True, "short_opt": "d", "is_path": True + }, + "createcandidate": { + "value": False, "required": False, "expect_arg": False, "short_opt": "c", "is_path": False + }, + "createsession": { + "value": False, "required": False, "expect_arg": False, "short_opt": "s", "is_path": False + }, + "nobidsvalidation": { + "value": False, "required": False, "expect_arg": False, "short_opt": "b", "is_path": False + }, + "nocopy": { + "value": False, "required": False, "expect_arg": False, "short_opt": "a", "is_path": False + }, + "type": { + "value": None, "required": False, "expect_arg": True, "short_opt": "t", "is_path": False + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and initiate the environment. + + loris_getopt_obj = LorisGetOpt(usage, options_dict, 'import_bids_dataset') + + env = make_env_from_opts(loris_getopt_obj) + + # Check the CLI arguments. + + type = loris_getopt_obj.options_dict['type']['value'] + if type not in (None, 'raw', 'derivative'): + log_error_exit( + env, + f"--type must be one of 'raw', 'derivative'\n{usage}", + lib.exitcode.MISSING_ARG, + ) + + args = pack_args(loris_getopt_obj.options_dict) + + # read and insert BIDS data + import_bids_dataset( + env, + args, + loris_getopt_obj.db, + ) + + print("Success !") + + +if __name__ == '__main__': + main() diff --git a/python/tests/integration/scripts/test_import_bids_dataset.py b/python/tests/integration/scripts/test_import_bids_dataset.py index 7400a7ffb..1409e6ef4 100644 --- a/python/tests/integration/scripts/test_import_bids_dataset.py +++ b/python/tests/integration/scripts/test_import_bids_dataset.py @@ -18,7 +18,7 @@ def test_import_eeg_bids_dataset(): db.commit() process = run_integration_script([ - 'bids_import.py', + 'import_bids_dataset.py', '--createcandidate', '--createsession', '--directory', '/data/loris/incoming/Face13', ]) @@ -37,7 +37,15 @@ def test_import_eeg_bids_dataset(): db, Path('bids_imports/Face13_BIDSVersion_1.1.0/sub-OTT166/ses-V1/eeg/sub-OTT166_ses-V1_task-faceO_eeg.edf'), ) + assert file is not None + assert file.archive is not None + assert file.event_archive is not None + + assert file.archive.path == \ + Path('bids_imports/Face13_BIDSVersion_1.1.0/sub-OTT166/ses-V1/eeg/sub-OTT166_ses-V1_task-faceO_eeg.tgz') + assert file.event_archive.path == \ + Path('bids_imports/Face13_BIDSVersion_1.1.0/sub-OTT166/ses-V1/eeg/sub-OTT166_ses-V1_task-faceO_events.tgz') # Check that the physiological file parameters has been inserted in the database. file_parameters = get_physio_file_parameters_dict(db, file.id)