Source code for msqms.io.raw_dataset
# -*- coding: utf-8 -*-
"""
Read Raw datasets
DatasetFormat:
Format 1 (Subject directories):
- datasets dir/
- sub01/ *.fif
- sub02/ *.fif
...
- sub03/ *.fif
or Format 2 (Single directory):
- datasets dir/
- *.fif
Get all fif file,and return the raw object lists.
"""
import os
[docs]
def read_raw_dataset(dataset_dir, file_suffix='.fif', dataset_format=None):
"""
Finds raw data file paths with the specified suffix from subdirectories or directly within a specified dataset directory.
It supports both standard MNE formats (.fif) and CTF formats (.ds).
This function checks that the dataset follows one of two possible structures:
- datasets dir/
- sub01/
- *.fif (or *.ds)
- sub02/
- *.fif (or *.ds)
- ...
OR
- datasets dir/
- *.fif (or *.ds)
Parameters
----------
dataset_dir : str
The path to the main directory containing raw data files. This can either be a directory containing
subdirectories for each subject or just a collection of raw data files in the main directory.
file_suffix : str, optional
The suffix of the files to find (default is '.fif'). Can be '.fif' for MNE datasets or '.ds' for CTF datasets.
dataset_format : str, optional
The format of the dataset. Can be one of:
- 'format1': Dataset with subject subdirectories.
- 'format2': Dataset with raw data files directly in the main directory.
If not provided, the function will attempt to detect the format automatically.
Returns
-------
raw_list : list of str
A list of file paths of the raw data files found in the subdirectories or directly in the dataset directory.
Raises
------
ValueError
If the dataset directory doesn't follow the expected structure or contains unsupported file types.
Notes
-----
This function assumes that the dataset directory contains either:
- subdirectories for each subject, each containing one or more raw data files with the specified suffix,
OR
- just raw data files directly within the main directory.
Example
-------
dataset_dir = "/path/to/dataset"
raw_list = read_raw_dataset(dataset_dir, file_suffix='.fif', dataset_format='format1')
# Print the file paths
for path in raw_list:
print(path)
"""
raw_list = []
# Check if the dataset directory exists
if not os.path.isdir(dataset_dir):
raise ValueError(f"The specified dataset directory {dataset_dir} is not valid.")
# Automatically detect the dataset format if not provided
if dataset_format is None:
subdirs = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]
if subdirs:
dataset_format = 'format1' # Detected Format 1: Subject subdirectories. BIDS Format
else:
dataset_format = 'format2' # Detected Format 2: Files directly in the main directory. Raw Format
# Handle 'format1' (dataset_dir contains subdirectories)
if dataset_format == 'format1':
subdirs = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]
for subdir in subdirs:
subdir_path = os.path.join(dataset_dir, subdir)
print(f"Checking subdirectory: {subdir_path}") # Debugging line
raw_file_found = False
for file in os.listdir(subdir_path):
if file.endswith(file_suffix):
raw_file_found = True
file_path = os.path.join(subdir_path, file)
raw_list.append(file_path)
# If no file with the expected suffix is found in the subdir, raise an error
if not raw_file_found:
print(f"No file with suffix {file_suffix} found in {subdir_path}") # Debugging line
raise ValueError(f"No file with suffix {file_suffix} found in subject directory: {subdir}")
# Handle 'format2' (dataset_dir contains raw data files directly)
elif dataset_format == 'format2':
for file in os.listdir(dataset_dir):
if file.endswith(file_suffix):
file_path = os.path.join(dataset_dir, file)
raw_list.append(file_path)
else:
raise ValueError(f"Unsupported dataset format: {dataset_format}; Supported formats: 'format1', 'format2'")
# If no raw data files were found, raise an error
if not raw_list:
raise ValueError(f"No raw data files found in {dataset_dir}")
return raw_list
if __name__ == "__main__":
dataset_dir = r"C:\Data\Datasets\OPM-Artifacts"
raw_list = read_raw_dataset(dataset_dir, file_suffix='.fif', dataset_format='format2')
# Print the raw data file paths
for path in raw_list:
print(path)