Converting HCPh's physio into BIDS¶
This notebook shows how to convert from AcqKnowledge into BIDS. First, let's import some necessary libraries.
%matplotlib inline
from pathlib import Path
from json import dumps
import string
import numpy as np
from scipy.signal import fftconvolve
from matplotlib import pyplot as plt
import pandas as pd
from bioread import read_file
plt.rcParams["figure.figsize"] = (20, 2.5)
In the file schedule.tsv
, we will keep the lookup table of AcqKnowledge files generated and their corresponding session.
Some sessions (like 13) required restarting AcqKnowledge or some other issue and produced several runs as a consequence, we'll need to have special care with those.
biopac_lookup = pd.read_csv("schedule.tsv", sep='\t', na_values="n/a", dtype={"Run": "Int8"})
biopac_lookup
session | day | PE | AcqKnowledge | Run | |
---|---|---|---|---|---|
0 | 1 | 2023-10-20 | LR | session2023-10-20T18:06:05.acq | <NA> |
1 | 3 | 2023-10-21 | LR | session2023-10-21T09:07:18.acq | <NA> |
2 | 4 | 2023-10-21 | RL | session2023-10-21T11:12:24.acq | <NA> |
3 | 5 | 2023-10-22 | PA | session2023-10-22T09:36:48.acq | <NA> |
4 | 6 | 2023-10-22 | PA | session2023-10-22T11:22:53.acq | <NA> |
5 | 7 | 2023-10-23 | LR | session2023-10-23T19:31:18.acq | <NA> |
6 | 8 | 2023-10-23 | RL | session2023-10-23T21:18:42.acq | <NA> |
7 | 9 | 2023-10-24 | AP | session2023-10-24T19:24:56.acq | <NA> |
8 | 10 | 2023-10-24 | RL | session2023-10-24T21:24:40.acq | <NA> |
9 | 11 | 2023-10-25 | AP | session2023-10-25T20:20:54.acq | 1 |
10 | 11 | 2023-10-25 | AP | session2023-10-25T21:22:50.acq | 2 |
11 | 13 | 2023-10-26 | PA | session2023-10-26T19:16:00.acq | 1 |
12 | 13 | 2023-10-26 | PA | session2023-10-26T21:25:29.acq | 2 |
13 | 13 | 2023-10-26 | PA | session2023-10-26T21:44:06.acq | 3 |
14 | 15 | 2023-10-27 | AP | session2023-10-27T19:09:15.acq | <NA> |
15 | 16 | 2023-10-27 | RL | session2023-10-27T21:01:57.acq | <NA> |
16 | 17 | 2023-10-28 | PA | session2023-10-28T17:19:04.acq | <NA> |
17 | 18 | 2023-10-28 | LR | session2023-10-28T19:18:02.acq | <NA> |
18 | 19 | 2023-10-29 | PA | session2023-10-29T17:15:43.acq | <NA> |
19 | 20 | 2023-10-29 | RL | session2023-10-29T19:08:34.acq | <NA> |
20 | 21 | 2023-10-30 | RL | session2023-10-30T19:08:03.acq | <NA> |
21 | 22 | 2023-10-30 | AP | session2023-10-30T20:56:42.acq | <NA> |
22 | 23 | 2023-10-31 | AP | session2023-10-31T19:26:31.acq | <NA> |
23 | 24 | 2023-10-31 | AP | session2023-10-31T21:35:54.acq | <NA> |
24 | 25 | 2023-11-01 | RL | session2023-11-01T19:19:20.acq | <NA> |
25 | 26 | 2023-11-01 | PA | session2023-11-01T21:08:31.acq | <NA> |
26 | 28 | 2023-11-02 | RL | session2023-11-02T21:05:57.acq | <NA> |
27 | 37 | 2023-11-09 | LR | session2023-11-09T18:31:20.acq | <NA> |
28 | 38 | 2023-11-09 | PA | session2023-11-09T20:21:11.acq | <NA> |
29 | 39 | 2023-11-10 | LR | session2023-11-10T18:12:39.acq | <NA> |
30 | 40 | 2023-11-10 | AP | session2023-11-10T20:02:46.acq | <NA> |
31 | 41 | 2023-11-11 | AP | session2023-11-11T08:20:41.acq | <NA> |
32 | 42 | 2023-11-11 | PA | session2023-11-11T10:14:41.acq | <NA> |
33 | 43 | 2023-11-12 | AP | session2023-11-12T09:18:49.acq | <NA> |
34 | 44 | 2023-11-12 | LR | session2023-11-12T11:08:00.acq | <NA> |
35 | 45 | 2023-11-13 | LR | session2023-11-13T18:31:22.acq | <NA> |
36 | 46 | 2023-11-13 | LR | session2023-11-13T20:16:37.acq | <NA> |
37 | 47 | 2023-11-14 | RL | session2023-11-14T18:59:58.acq | <NA> |
38 | 48 | 2023-11-14 | PA | session2023-11-14T20:45:50.acq | <NA> |
Now, let's set some constants and extract the corresponding AcqKnowledge file.
Update the session
variable here to change what you are converting.
DATA_PATH = Path("/data/datasets/hcph-pilot-sourcedata/recordings/BIOPAC")
BIDS_PATH = Path("/data/datasets/hcph")
RECALIBRATED_SESSION = 23
FIRST_O2_SESSION = 11 #The cable to record O2 signal has been received midway through the acquisition
MISSING_RB = (29, )
participant = "001"
session = 48
session_excluded = False
biopac_session = biopac_lookup[biopac_lookup.session == session]
if len(biopac_session) != 1:
raise RuntimeError
pe = biopac_session.PE.values[0]
if session_excluded:
print("SESSION IS MARKED EXCLUDED!!!!!")
Prepare repeated metadata across JSON files. These JSON files may be consolidated at the top level of the BIDS structure for the most part.
CARD_JSON = {
"Columns": ["ecg"],
"Manufacturer": "BIOPAC Systems, Inc., Goleta, CA, US",
"ecg": {
"Description": "continuous measurements of Lead I electrocardiogram",
"Units": "mV",
"Model": "ECG100C MRI & MECMRI-2 amplifier",
},
}
RESP_JSON = {
"Columns": ["belt", "CO2", "O2"],
"Manufacturer": "BIOPAC Systems, Inc., Goleta, CA, US",
"belt": {
"Description": "continuous breathing measurement by negative differential pressure with a respiration belt",
"Units": "cm H20",
"Model": "DA100C TSD160A transducer",
},
"CO2": {
"Description": "continuous breathing measurement: absolute CO2 concentration",
"Units": "%",
"Model": "ML206",
"Manufacturer": "AD Instruments Pty. Ltd., Sydney, Australia"
},
"O2": {
"Description": "continuous breathing measurement: absolute O2 concentration",
"Units": "%",
"Model": "ML206",
"Manufacturer": "AD Instruments Pty. Ltd., Sydney, Australia"
}
}
if session < FIRST_O2_SESSION:
RESP_JSON.pop("O2", None)
RESP_JSON["Columns"].remove("O2")
if session in MISSING_RB:
RESP_JSON.pop("belt")
RESP_JSON["Columns"].remove("belt")
We use the bioread
package to load in the AcqKnowledge files.
session_data = read_file(str(DATA_PATH / biopac_session.AcqKnowledge.values[0]))
channels = session_data.channels
Now, let's peek into the data. First, we check the frequency is 5 kHz. Then, we visualize the trigger channel and extract the onset location.
freq = session_data.channels[4].samples_per_second
timeseries = session_data.channels[4].time_index
freq, len(timeseries)
(5000.0, 25916272)
plt.plot(timeseries, session_data.channels[4].data);
data = session_data.channels[4].data
trigger_events = np.clip(np.diff(data, prepend=0), -1, 1)
plt.plot(timeseries, trigger_events);
trigger_onsets = np.clip(trigger_events.copy(), 0, 1)
plt.plot(timeseries, trigger_onsets);
Let's extract the onset of each run. The following cell should show a number 4 as the output. If it is higher, manually delete onsets by index. If it is lower, do not delete onsets and check because this could be a partial file.
trigger_locations = np.argwhere(trigger_onsets > 0)[:, 0]
trigger_spacings = np.diff(trigger_locations, prepend=0)
# 40s is a good guesstimate of the time betweem the last trigger of a sequence and the start of the next
between_seq = trigger_locations[trigger_spacings > freq * 40]
seq_limits = np.zeros_like(trigger_onsets)
seq_limits[between_seq] = 1
plt.plot(timeseries, trigger_onsets);
plt.plot(timeseries, seq_limits);
If the correct triggers are preserved, the plot below should show an orange trigger at the beginning of the blocks we are interested in (DWI, QCT, rest, BHT) and maybe some additional blocks which we will remove manually below.
We are not interested in extracting the physiological recordings during the fieldmap scans. Thus, we manually delete the trigger corresponding to the fieldmap blocks.
if len(between_seq) > 4:
# between_seq = np.delete(between_seq, (0, 1, 2, 4, 6, 7, 8)) # session 16
# between_seq = np.delete(between_seq, (0, 3, 4)) # session 15
# between_seq = np.delete(between_seq, (0, 1, 4, 5, -1)) # session 9
# between_seq = np.delete(between_seq, (2, 3, -1))
# between_seq = np.delete(between_seq, (0, 2, 3, 4, 5, 7, 8, 9)) # session 1
# between_seq = np.delete(between_seq, (2, 3, 4))
seq_limits = np.zeros_like(trigger_onsets)
seq_limits[between_seq] = 1
plt.plot(timeseries, trigger_onsets);
plt.plot(timeseries, seq_limits);
seq_onsets = [timeseries[i] for i in between_seq]
seq_onsets
[846.9438326800024, 3098.1265195436795, 3454.431733292, 4759.619983653728]
Let's now identify the stopping time of each run.
# Use append to "fake" a trigger at the very end of the run
rev_trigger_sp = np.diff(trigger_locations, append=len(trigger_onsets))
rev_between_seq = trigger_locations[rev_trigger_sp > freq * 2]
rev_between_seq = np.delete(rev_between_seq, np.s_[2:-2])
rev_seq_limits = np.zeros_like(trigger_onsets)
rev_seq_limits[rev_between_seq] = 1
plt.plot(timeseries, trigger_onsets);
plt.plot(timeseries, rev_seq_limits);
Converting physio corresponding to the DWI run¶
Cardiac signal (ECG) is stored in channel 2 of our AcqKnowledge files (i.e., index 1 in Python, which is zero-based).
First, let's calculate in the particular ECG channel, the indexes at which we are going to clip the cardiac signal around each imaging run.
For DWI, we will clip the files between dwi_idx_start
and dwi_idx_stop
.
In this case, for DWI we need to take into account that the sequence sends triggers during calibration (2 x 87 slices single slice mode, plus 29 for one multi-slice volume).
With this information, we will be able to calculate dwi_onset
as the timepoint (in seconds) when the DWI sequence started acquiring the first final volume.
dummies = 87 * 2 + 29
first_trigger = trigger_locations[np.argwhere(trigger_locations == between_seq[0])[0, 0] + dummies]
t_0 = timeseries[first_trigger]
first_timepoint = max(int(between_seq[0] - 600 * freq), 0)
t_start = timeseries[first_timepoint]
last_timepoint = between_seq[1] if len(between_seq) > 1 else len(timeseries) - 1
t_stop = timeseries[last_timepoint]
last_trigger = trigger_locations[np.argwhere(trigger_locations == rev_between_seq[0])[0, 0]]
t_run_stop = timeseries[last_trigger] + 7.0
Because DWI and trigger have both the same sampling frequency, this below could be simpler, but let's keep it to apply it later with other signals with different frequency. Basically, we convert trigger indexes to time, and then, using the timeseries of the particular channel, we find the what that trigger index would be, had the trigger timeseries been acquired at this other channel frequency instead.
ch_index = 1
channels = session_data.channels
ch_freq = channels[ch_index].samples_per_second
ch_timeseries = channels[ch_index].time_index
ch_data = channels[ch_index].data
dwi_onset = ch_timeseries[(np.abs(ch_timeseries - t_0)).argmin()]
dwi_idx_start = (np.abs(ch_timeseries - t_start)).argmin()
dwi_idx_stop = (np.abs(ch_timeseries - t_stop)).argmin()
x_sec = ch_timeseries[dwi_idx_start:dwi_idx_stop] - dwi_onset
dwi_ch_data = ch_data[dwi_idx_start:dwi_idx_stop]
dwi_trigger = data[first_timepoint:last_timepoint]
# dwi_idx_start, dwi_idx_stop, dwi_onset, x_sec[0], x_sec[-1], len(x_sec)
Let's now plot the ECG (blue) and trigger (orange) at the beginning of the DWI (at $t=0$, marked by the red tick). It is clear how the ECG signal has been collected for a long while before the DWI, and then, at about 30 sec before the DWI first orientation onset, we see how the triggers corresponding to each slice of calibration kick in. There are 2 volumes $\times$ 87 slices/volume (total 174 onsets) that are very fast at the beginning, followed by 29 triggers at a lower rate (one "dummy" or reference volume, acquired with the same SMS factor 3 of the rest of the DWI).
plt.plot(x_sec, dwi_ch_data)
plt.plot(x_sec, np.clip(dwi_trigger, 0, 0.1))
plt.vlines(0, -0.2, 0.4, colors="r")
plt.xlim((-40, 1))
(-40.0, 1.0)
dwi_cardio_filepath = BIDS_PATH / f"sub-{participant}" / f"ses-{'excl' if session_excluded else ''}{session:03d}" / "dwi" / f"sub-{participant}_ses-{'excl' if session_excluded else ''}{session:03d}_acq-highres_dir-{pe}_recording-cardiac_physio.tsv.gz"
dwi_sidecar = CARD_JSON.copy()
dwi_sidecar.update({
"SamplingFrequency": ch_freq,
"StartTime": x_sec[0],
})
(dwi_cardio_filepath.parent / dwi_cardio_filepath.name.replace(".tsv.gz", ".json")).write_text(dumps(dwi_sidecar, indent=2))
pd.DataFrame({"ecg": dwi_ch_data}).to_csv(
dwi_cardio_filepath,
compression="gzip",
header=False,
sep="\t",
na_rep="n/a",
)
Let's move on to the respiration belt.
dwi_resp_data = {}
ch_freq = channels[0].samples_per_second
ch_timeseries = channels[0].time_index
ch_data = channels[0].data
dwi_onset = ch_timeseries[(np.abs(ch_timeseries - t_0)).argmin()]
dwi_idx_start = (np.abs(ch_timeseries - t_start)).argmin()
dwi_idx_stop = (np.abs(ch_timeseries - t_stop)).argmin()
x_sec = ch_timeseries[dwi_idx_start:dwi_idx_stop] - dwi_onset
dwi_resp_data["belt"] = ch_data[dwi_idx_start:dwi_idx_stop]
assert channels[2].samples_per_second == ch_freq
if session >= FIRST_O2_SESSION:
assert channels[3].samples_per_second == ch_freq
dwi_resp_data["CO2"] = channels[2].data[dwi_idx_start:dwi_idx_stop]
dwi_resp_data["O2"] = channels[3].data[dwi_idx_start:dwi_idx_stop]
if session < RECALIBRATED_SESSION: # Before session 23, calibration was a bit off
dwi_resp_data["CO2"] = dwi_resp_data["CO2"] * (8.0 - 0.045) / 0.8 + 0.045
dwi_resp_data["O2"] = (dwi_resp_data["O2"] - 0.1) * 10.946 / (20.946 + 0.1) + 10
plt.plot(x_sec, dwi_resp_data["belt"])
plt.xlim((-40, 1))
(-40.0, 1.0)
Now, let's have a look at the gas analyzer signal.
plt.plot(x_sec, dwi_resp_data["CO2"])
if session >= FIRST_O2_SESSION:
plt.plot(x_sec, dwi_resp_data["O2"] - 20.946)
plt.xlim((-40, 1))
(-40.0, 1.0)
dwi_resp_filepath = BIDS_PATH / f"sub-{participant}" / f"ses-{'excl' if session_excluded else ''}{session:03d}" / "dwi" / f"sub-{participant}_ses-{'excl' if session_excluded else ''}{session:03d}_acq-highres_dir-{pe}_recording-respiratory_physio.tsv.gz"
dwi_sidecar = RESP_JSON.copy()
dwi_sidecar.update({
"SamplingFrequency": ch_freq,
"StartTime": x_sec[0],
})
(dwi_resp_filepath.parent / dwi_resp_filepath.name.replace(".tsv.gz", ".json")).write_text(dumps(dwi_sidecar, indent=2))
pd.DataFrame(dwi_resp_data).to_csv(
dwi_resp_filepath,
compression="gzip",
columns=dwi_sidecar["Columns"],
header=False,
sep="\t",
na_rep="n/a",
)
Finally, let's write all digital signals in a third file:
dwi_triggers_filepath = BIDS_PATH / f"sub-{participant}" / f"ses-{'excl' if session_excluded else ''}{session:03d}" / "dwi" / f"sub-{participant}_ses-{'excl' if session_excluded else ''}{session:03d}_acq-highres_dir-{pe}_stim.tsv.gz"
channel_idx = list(range(4, len(channels)))
columns = ["trigger"] + [f"digital_ch{line}" for line in range(5, len(channels))]
dwi_trigger_data = {
name: channels[ch_i].data[first_timepoint:last_timepoint]
for name, ch_i in zip(columns, channel_idx)
}
dwi_sidecar = {
"SamplingFrequency": channels[4].samples_per_second,
"StartTime": t_start - t_0,
"Columns": columns,
"Manufacturer": "BIOPAC Systems, Inc., Goleta, CA, US",
}
for col, chid in zip(columns, channel_idx):
dwi_sidecar[col] = {
"Description": f"{''.join(filter(lambda x: x in string.printable, session_data.channel_headers[chid]['szCommentText'].decode().strip()))}: digital pulse signal generated with Psychopy",
"Units": "V",
"Model": "SPT100D",
}
dwi_sidecar["trigger"]["Description"] = "continuous measurement of the scanner trigger signal"
(dwi_triggers_filepath.parent / dwi_triggers_filepath.name.replace(".tsv.gz", ".json")).write_text(dumps(dwi_sidecar, indent=2))
pd.DataFrame(dwi_trigger_data).to_csv(
dwi_triggers_filepath,
compression="gzip",
header=False,
sep="\t",
na_rep="n/a",
)
Converting the physio corresponding to BHT into BIDS¶
dummies = 0
first_trigger = trigger_locations[np.argwhere(trigger_locations == between_seq[-1])[0, 0] + dummies]
t_0 = timeseries[first_trigger]
first_timepoint = int(between_seq[-1] - 120 * freq)
t_start = timeseries[first_timepoint]
last_timepoint = len(timeseries) - 1
t_stop = timeseries[last_timepoint]
last_trigger = trigger_locations[np.argwhere(trigger_locations == rev_between_seq[-1])[0, 0]]
t_run_stop = timeseries[last_trigger] + 1.6
Again as in the DWI section, we find what that trigger index would be, had the trigger timeseries been acquired at this other channel frequency instead.
ch_index = 1
channels = session_data.channels
ch_freq = channels[ch_index].samples_per_second
ch_timeseries = channels[ch_index].time_index
ch_data = channels[ch_index].data
bht_onset = ch_timeseries[(np.abs(ch_timeseries - t_0)).argmin()]
bht_idx_start = (np.abs(ch_timeseries - t_start)).argmin()
bht_idx_stop = (np.abs(ch_timeseries - t_stop)).argmin()
bht_run_idx_stop = (np.abs(ch_timeseries - t_run_stop)).argmin()
x_sec = ch_timeseries[bht_idx_start:bht_idx_stop] - bht_onset
bht_ch_data = ch_data[bht_idx_start:bht_idx_stop]
bht_trigger = data[first_timepoint:last_timepoint]
# bht_idx_start, bht_idx_stop, bht_run_idx_stop, bht_onset, x_sec[0], x_sec[-1], len(x_sec)
And we plot the ECG signal along with the trigger. Note that fMRI sequence sends a trigger every volume, unlike the DWI sequence which sends it every slice, so the triggers are more spaced.
plt.plot(x_sec, bht_ch_data)
plt.plot(x_sec, bht_trigger / 50)
plt.vlines(0, -0.2, 0.4, colors="r")
plt.xlim((-5, 10))
(-5.0, 10.0)
bht_cardio_filepath = BIDS_PATH / f"sub-{participant}" / f"ses-{'excl' if session_excluded else ''}{session:03d}" / "func" / f"sub-{participant}_ses-{'excl' if session_excluded else ''}{session:03d}_task-bht_dir-{pe}_recording-cardiac_physio.tsv.gz"
bht_sidecar = CARD_JSON.copy()
bht_sidecar.update({
"SamplingFrequency": ch_freq,
"StartTime": x_sec[0],
})
(bht_cardio_filepath.parent / bht_cardio_filepath.name.replace(".tsv.gz", ".json")).write_text(dumps(bht_sidecar, indent=2))
pd.DataFrame({"ecg": bht_ch_data}).to_csv(
bht_cardio_filepath,
compression="gzip",
header=False,
sep="\t",
na_rep="n/a",
)
bht_resp_data = {}
ch_index = 0
ch_freq = channels[ch_index].samples_per_second
ch_timeseries = channels[ch_index].time_index
ch_data = channels[ch_index].data
bht_onset = ch_timeseries[(np.abs(ch_timeseries - t_0)).argmin()]
bht_idx_start = (np.abs(ch_timeseries - t_start)).argmin()
bht_idx_stop = (np.abs(ch_timeseries - t_stop)).argmin()
bht_run_idx_stop = (np.abs(ch_timeseries - t_run_stop)).argmin()
x_sec = ch_timeseries[bht_idx_start:bht_idx_stop] - bht_onset
bht_resp_data["belt"] = ch_data[bht_idx_start:bht_idx_stop]
assert channels[2].samples_per_second == ch_freq
if session >= FIRST_O2_SESSION:
assert channels[3].samples_per_second == ch_freq
bht_resp_data["CO2"] = channels[2].data[bht_idx_start:bht_idx_stop]
bht_resp_data["O2"] = channels[3].data[bht_idx_start:bht_idx_stop]
if session < RECALIBRATED_SESSION: # Before session 23, calibration was a bit off
bht_resp_data["CO2"] = bht_resp_data["CO2"] * (8.0 - 0.045) / 0.8 + 0.045
bht_resp_data["O2"] = (bht_resp_data["O2"] - 0.1) * 10.946 / (20.946 + 0.1) + 10
plt.plot(x_sec, bht_resp_data["belt"])
plt.plot(x_sec, bht_resp_data["CO2"])
# if session >= FIRST_O2_SESSION:
# plt.plot(x_sec, bht_resp_data["O2"] - 18)
#plt.xlim((100, 160))
plt.vlines(x_sec[bht_run_idx_stop - bht_idx_start], -4, 4, colors="r")
<matplotlib.collections.LineCollection at 0x7fcd6ae86520>