Does replaying the same movie clip reduce the variability of resting-state fMRI?¶
The interactive version of this notebook is available at https://github.com/TheAxonLab/hcph-sops/tree/mkdocs/docs/analysis.
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import nibabel as nib
from pathlib import Path
from bids.layout import BIDSLayout, BIDSLayoutIndexer, add_config_paths
from confounds import get_confounds
from load_save import get_atlas_data
#Define filtering parameters
metric = "correlation"
atlas_dimension = "128"
fd_threshold = 0.5
fdthresh_str = str(fd_threshold).replace(".", "")
# Define paths
code_path = Path("/data/code/hcph-sops/")
fc_path = Path(f"/data/derivatives/hcph-fc/")
dataset_path = Path("/data/datasets/hcph-dataset")
suppl_path = Path("/data/derivatives/hcph-suppl/")
confound_path = Path("/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv")
iqms_path = Path("/data/derivatives/hcph-mriqc/group_bold.tsv")
## Helper function to save figure captions
def save_caption(caption, save_layout, entities):
"""
Save the figure caption to a JSON file.
Parameters:
- caption: The caption text to save.
- save_layout: A BIDSLayout object for saving the caption.
- entities: A dictionary containing BIDS entities for the filename.
"""
# Update the entities to use .json extension
entities['extension'] = '.json'
json_save_path = save_layout.build_path(entities, validate=False)
# Save the caption to a JSON file
caption_data = {"caption": caption}
with open(json_save_path, 'w') as json_file:
json.dump(caption_data, json_file)
Load the functional connectivity matrices¶
# Initialize the BIDS layout, we use a custom indexer because we are using entities that are not yet officially recognized by BIDS
config_path = code_path / "code/bids/indexer.json"
try:
add_config_paths(hcph=config_path)
except ValueError as e:
if "Configuration 'hcph' already exists" in str(e):
print("Configuration 'hcph' already exists, skipping add_config_paths.")
else:
raise e
_indexer = BIDSLayoutIndexer(
config_filename=config_path,
index_metadata=False,
validate=False,
)
layout = BIDSLayout(fc_path, config="hcph", indexer=_indexer, validate=False)
save_layout = BIDSLayout(suppl_path, config="hcph", indexer=_indexer, validate=False)
# Get all the files matching the pattern
entities_base = {
'subject': '001',
'task': 'rsmovie',
'measure': metric,
'scale': atlas_dimension,
'fd_threshold': fdthresh_str,
}
files = layout.get(**entities_base, suffix='connectivity', extension='.tsv', return_type='file')
ses_index = [tsv.split('ses-')[1].split('/')[0] for tsv in files]
assert len(files) == 36, f"Expected 36 FC matrices, got {len(files)}"
# Load the data from each file and concatenate
# Keep only upper-triangle since the FC matrix is symmetric (does the diagonal of our FC contain only ones ? if yes can discard the diagonal, if not have to keep it
fc_matrices = []
for file in files:
fc_matrix = pd.read_csv(file, sep='\t', header=None)
if metric == "sparseinversecovariance":
# Negate the precision matrix to get partial correlation matrix
fc_matrix = -fc_matrix
fc_matrices.append(fc_matrix.values[np.triu_indices_from(fc_matrix, k=0)])
# Store the matrix size for reconstruction later
fc_size = fc_matrix.shape[0]
# Concatenate all the upper triangle values
fc_concat = np.vstack(fc_matrices)
# Load region names
atlas_data = get_atlas_data(dimension=int(atlas_dimension))
atlas_labels = getattr(atlas_data, "labels")
region_labels = atlas_labels["difumo_names"]
assert len(region_labels) == fc_size, f"Expected {fc_size} region labels, got {len(region_labels)}"
#Plot an example FC matrix
plt.figure(figsize=(8,6))
sns.heatmap(fc_matrix, cmap='viridis')
plt.title(f"Example FC matrix")
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
#plt.yticks(ticks=np.arange(len(region_labels))+0.5, labels=region_labels)
# Generate a random matrix of size 2080x36
#fc_concat = np.random.rand(2080, 36)
print(f"Number of brain regions (matrix size): {fc_size}")
print(f"Shape of concatenated upper triangle FC matrices (number of region pairs, number of sessions): {fc_concat.shape}")
[get_dataset_dir] Dataset found in /home/cprovins/nilearn_data/difumo_atlases Number of brain regions (matrix size): 119 Shape of concatenated upper triangle FC matrices (number of region pairs, number of sessions): (36, 7140)
# Add the information that apply too all figures generated below to the base entities
entities_base["experiment"] = "fmrivar"
entities_base["extension"] = ".svg"
entities_base["datatype"] = "figures"
entities_base["suffix"] = "bold"
# Entities specific to this plot
entities = {'visualization': 'similmat'}
entities.update(entities_base)
similarity_matrix = np.corrcoef(fc_concat.T, rowvar=False)
print(f"Shape of the similarity matrix (number of sessions x number of sessions): {similarity_matrix.shape}")
mean_corr = np.mean(similarity_matrix)
print(f"Overall mean correlation: {mean_corr}")
plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, cmap='viridis')
plt.xticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index, rotation=90)
plt.yticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Session Index', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Similarity matrix presenting the Pearson correlation between FC matrices for each session.**The overall mean correlation is {mean_corr:.2f}." \
"The fact that the FC matrices are highly correlated probably explains why only two components are enough to explain the vast majority of the variance." \
"It is also visible that session index 30 is less correlated with the other sessions compared to the other pair-wise comparisons."
save_caption(caption, save_layout, entities)
Shape of the similarity matrix (number of sessions x number of sessions): (36, 36) Overall mean correlation: 0.8581150373039423
Run PCA to estimate the residual variance for each rsfMRI run¶
Choose number of principal components¶
We are using the elbow plot to decide the number of principal components to keep.
from sklearn.decomposition import PCA
# Entities specific to this plot
entities = {'visualization': 'explainedvar'}
entities.update(entities_base)
# Run PCA
# Center the data by removing the mean of each connection
fc_concat = fc_concat - np.mean(fc_concat, axis=0)
# Standardize the data to have unit variance
#fc_concat = fc_concat / np.std(fc_concat, axis=0)
# Run PCA
pca = PCA()
pca.fit(fc_concat)
# Extract the explained variance
explained_variance = pca.explained_variance_ratio_
# Generate the elbow plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='-')
ax.set_xlabel('Number of Components', fontsize=14)
ax.set_ylabel('Explained Variance Ratio', fontsize=14)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#Define the number of components to keep
n_comp = 6
# Highlight the points corresponding to number of components we keep in red
ax.plot(n_comp, explained_variance[n_comp-1], marker='o', markersize=8, color='red')
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = "**Elbow plot to choose the number of principal components.**The point in red indicates the number of components selected."
save_caption(caption, save_layout, entities)
Plot the principal components¶
# Function to reconstruct the full matrix from the upper triangle
def reconstruct_full_matrix(upper_triangle, size):
full_matrix = np.zeros((size, size))
upper_indices = np.triu_indices(size)
full_matrix[upper_indices] = upper_triangle
full_matrix = full_matrix + full_matrix.T - np.diag(np.diag(full_matrix))
return full_matrix
# Reorder regions by network
network_order = atlas_labels['yeo_networks7'].argsort()
reordered_regions_labels = atlas_labels.iloc[network_order]['difumo_names']
for i, pc in enumerate(pca.components_[:n_comp, :]):
# Reconstruct the full matrix from the upper triangle
full_matrix = reconstruct_full_matrix(pc, fc_size)
# Cluster regions by network
full_matrix = full_matrix[network_order, :][:, network_order]
# Plot the principal component as a heatmap
plt.figure(figsize=(20, 18))
sns.heatmap(full_matrix, cmap='viridis')
plt.title(f'Principal Component {i+1}')
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
# Adjust y-ticks to move them further away from the heatmap
plt.yticks(ticks=np.arange(len(reordered_regions_labels)) + 0.5, labels=reordered_regions_labels, va='center', rotation=0)
plt.gca().tick_params(axis='y', pad=15) # Move y-ticks further away
plt.gca().tick_params(axis='x', pad=15) # Move x-ticks further away
# Add a color bar on the left and bottom ledge indicating the network
network_colors = sns.color_palette("tab20", n_colors=7) # Define 7 colors for the networks
network_segments = atlas_labels.iloc[network_order]['yeo_networks7'].astype('category').cat.codes
# Add color bar on the left with increased thickness
for idx, color in enumerate(network_segments):
plt.gca().add_patch(plt.Rectangle((-1.5, idx), 1.5, 1, color=network_colors[color], transform=plt.gca().transData, clip_on=False))
# Add color bar on the bottom with increased thickness
for idx, color in enumerate(network_segments):
plt.gca().add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 1.5, color=network_colors[color], transform=plt.gca().transData, clip_on=False))
# Add legend for the networks
network_labels = atlas_labels['yeo_networks7'].astype('category').cat.categories
legend_patches = [plt.Line2D([0], [0], color=network_colors[i], lw=4, label=label) for i, label in enumerate(network_labels)]
plt.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(1.37, 1), title="Networks", fontsize=12, title_fontsize=14)
# Plot all principal components as heatmaps in a single figure with subplots
n_rows = int(np.ceil(n_comp / 3))
n_cols = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))
for i, pc in enumerate(pca.components_[:n_comp, :]):
full_matrix = reconstruct_full_matrix(pc, fc_size)
# Cluster regions by network
full_matrix = full_matrix[network_order, :][:, network_order]
ax = axes[i // n_cols, i % n_cols]
sns.heatmap(full_matrix, cmap='viridis', ax=ax, cbar_kws={'shrink': 0.5})
ax.set_title(f'Principal Component {i+1}')
ax.set_xlabel('Brain Regions', labelpad=20)
ax.set_ylabel('Brain Regions', labelpad=20)
ax.set_xticks([]) # Hide x-ticks
ax.set_yticks([]) # Hide y-ticks
# Add color bar on the left with increased thickness
for idx, color in enumerate(network_segments):
ax.add_patch(plt.Rectangle((-3, idx), 3, 1, color=network_colors[color], transform=ax.transData, clip_on=False))
# Add color bar on the bottom with increased thickness
for idx, color in enumerate(network_segments):
ax.add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 3, color=network_colors[color], transform=ax.transData, clip_on=False))
# Add a single legend for the networks on the right of the figure
fig.legend(
handles=legend_patches,
loc='center right',
bbox_to_anchor=(1.1, 0.5), # Position the legend outside the figure, vertically centered
title="Networks",
fontsize=10,
title_fontsize=12
)
# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
axes[j // n_cols, j % n_cols].axis('off')
plt.tight_layout()
plt.show()
print(f"PC 1 has mostly negative weight on region {region_labels[86]}")
print(f"PC 2 has mostly positive weight on regions {region_labels[10]}, {region_labels[79]}, {region_labels[86]} and {region_labels[90]}")
PC 1 has mostly negative weight on region Internal capsule PC 2 has mostly positive weight on regions Cerebellum Crus I posterior, Superior temporal sulcus LH, Internal capsule and Lingual gyrus
Reconstruct the FC with a limited number of components¶
After extracting the principal components, each FC matrix can be expressed as a linear combinations of the principal components plus a residual matrix.
print("Shape of PCA components matrix (n_components, n_features):", pca.components_.shape)
# Project PC components back into each FC subject space
fc_projected = pca.transform(fc_concat)[:, :n_comp]
print("Shape of projected FC onto the first n_comp PCs (n_samples, n_comp):", fc_projected.shape)
fc_reconstructed = np.dot(fc_projected, pca.components_[:n_comp, :]) + np.mean(fc_concat, axis=0)
print("Shape of reconstructed FC matrix (n_samples, n_features):", fc_reconstructed.shape)
Shape of PCA components matrix (n_components, n_features): (36, 7140) Shape of projected FC onto the first n_comp PCs (n_samples, n_comp): (36, 6) Shape of reconstructed FC matrix (n_samples, n_features): (36, 7140)
from sklearn.linear_model import LinearRegression
# Create the figure and subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15,10), sharex=True)
axes = axes.flatten() # Flatten the axes array for easy indexing
# Ensure axes is iterable even if n_comp == 1
if n_comp == 1:
axes = [axes]
for i in range(n_comp):
ax = axes[i]
ax.plot(range(fc_projected.shape[0]), fc_projected[:, i], marker='o', label=f'PC{i+1}')
# Fit linear regression
X = np.arange(fc_projected.shape[0]).reshape(-1, 1)
y = fc_projected[:, i]
reg = LinearRegression().fit(X, y)
y_pred = reg.predict(X)
ax.plot(range(fc_projected.shape[0]), y_pred, color='green', linestyle='--', label='Linear Fit')
ax.set_xticks(np.arange(len(ses_index)))
ax.set_xticklabels(ses_index, rotation=90)
ax.set_xlabel('Session Index', fontsize=12)
ax.set_ylabel('PC Weight', fontsize=12)
ax.set_title(f'PC{i+1}', fontsize=14)
ax.legend()
plt.tight_layout()
plt.show()
Compute the residual matrix¶
# Calculate the residuals as the difference between the full and reconstructed FC matrices
residuals = fc_concat - fc_reconstructed
print(f"Residuals matrix shape (n_sessions, n_features): {residuals.shape}")
Residuals matrix shape (n_sessions, n_features): (36, 7140)
import numpy as np
from sklearn.decomposition import PCA
# Project the data onto the residual components (from n_comp + 1 onwards)
residuals_frompc = pca.transform(fc_concat)[:, n_comp:]
# Reconstruct the data using the residual components
residuals_frompc = np.dot(residuals_frompc, pca.components_[n_comp:, :]) + np.mean(fc_concat, axis=0)
# Print the shape of the reconstructed data
print(f"Residuals from PC matrix shape (n_sessions, n_features): {residuals_frompc.shape}")
Residuals from PC matrix shape (n_sessions, n_features): (36, 7140)
Plot residual matrix¶
Sanity check the reconstruction function¶
# Test my reconstruction function
upper_triangle = fc_matrix.values[np.triu_indices_from(fc_matrix, k=0)]
full_matrix = reconstruct_full_matrix(upper_triangle, fc_size)
full_matrix.shape
assert np.array_equal(full_matrix, fc_matrix.values)
residual = reconstruct_full_matrix(residuals[0,:], fc_size)
# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()
# Check whether reconstructing the residuals a different way leads to the same result
residual_frompc = reconstruct_full_matrix(residuals_frompc[0,:], fc_size)
# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual_frompc, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()
Plot the original FC, reconstructed FC and residual matrix for each session¶
# Iterate over each session
for s in range(fc_concat.shape[0]):
# Reconstruct the full matrices for original FC, reconstructed FC, and residuals
original_fc = reconstruct_full_matrix(fc_concat[s, :], fc_size)
reconstructed_fc = reconstruct_full_matrix(fc_reconstructed[s, :], fc_size)
residual_fc = reconstruct_full_matrix(residuals[s, :], fc_size)
# Create a 1x3 subplot
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
# Plot the original FC matrix
sns.heatmap(original_fc, cmap='viridis', ax=axes[0])
axes[0].set_title(f'Session {ses_index[s]}: Original FC', fontsize=14)
# Plot the reconstructed FC matrix
sns.heatmap(reconstructed_fc, cmap='viridis', ax=axes[1])
axes[1].set_title(f'Session {ses_index[s]}: Reconstructed FC', fontsize=14)
# Plot the residual matrix
sns.heatmap(residual_fc, cmap='viridis', ax=axes[2])
axes[2].set_title(f'Session {ses_index[s]}: Residual', fontsize=14)
# Adjust layout and display the plot
plt.tight_layout()
plt.show()
Plot the variance of the residuals over sessions¶
def plot_res_variance(residuals, groupedby=False, min_res_var=0, max_res_var=1000, ses_id=None):
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
# Compute the variance of the residuals for each session
residuals_var = np.var(residuals, axis=1)
min_res_var = min(min_res_var, min(residuals_var))
max_res_var = max(max_res_var, max(residuals_var))
# Plot the variance of the residuals as a function of the session index
plt.plot(range(len(residuals_var)), residuals_var, marker='o', linestyle='-', label='Variance of Residuals')
# Fit and plot a negative exponential curve
def neg_exp(x, a, b, c):
return a * np.exp(-b * x) + c
if residuals_var.shape[0] > 3:
#popt, _ = curve_fit(neg_exp, range(len(residuals_var)), residuals_var, p0=(1, 0.01, 1))
#neg_exp_r2 = r2_score(residuals_var, neg_exp(range(len(residuals_var)), *popt))
#plt.plot(range(len(residuals_var)), neg_exp(range(len(residuals_var)), *popt), linestyle='--', label=f'Negative Exponential Fit (R$^2$={neg_exp_r2:.2f})')
# Fit and plot a quadratic curve
quadratic_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 2))
quadratic_r2 = r2_score(residuals_var, quadratic_model(range(len(residuals_var))))
plt.plot(range(len(residuals_var)), quadratic_model(range(len(residuals_var))), linestyle='--', label=f'Quadratic Fit (R$^2$={quadratic_r2:.2f})')
if residuals_var.shape[0] > 1:
# Fit and plot a linear curve
linear_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 1))
linear_r2 = r2_score(residuals_var, linear_model(range(len(residuals_var))))
plt.plot(range(len(residuals_var)), linear_model(range(len(residuals_var))), linestyle='--', label=f'Linear Fit (R$^2$={linear_r2:.2f})')
if ses_id is not None:
plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.legend()
if groupedby:
plt.title(groupedby, fontsize=16)
return min_res_var, max_res_var
# Exclude 30th column
#residuals_wo_outlier = np.delete(residuals, 30, axis=1)
plt.figure(figsize=(10, 6))
# Plot with residuals
#plt.subplot(1, 2, 1)
plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)), labels=ses_index, rotation=90)
plt.xlabel('Session Index', fontsize=16)
plt.ylabel('Variance of Residuals', fontsize=16)
#plt.title('All Sessions', fontsize=16)
#plt.ylim(0.06, 0.24)
"""
# Originally in the abstract I had investigated the effect of excluding the outlier session on the variance of the residuals.
# Plot with residuals_wo_outlier
plt.subplot(1, 2, 2)
plot_res_variance(residuals_wo_outlier)
#plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)-1), labels=np.delete(ses_index, 30), rotation=90)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Variance of Residuals', fontsize=14)
plt.title('Excluding the Outlier', fontsize=16)
#plt.ylim(0.06, 0.24)
"""
plt.tight_layout()
plt.show()
Plot edgewise residual over sessions¶
def plot_edgewise_residual(residuals, groupedby=False, ses_id=None):
# Calculate the interquartile range (IQR)
q1 = np.percentile(residuals, 25, axis=1)
q3 = np.percentile(residuals, 75, axis=1)
# Plot the evolution of the residual for each edge
for i in range(residuals.shape[1]):
plt.plot(range(residuals.shape[0]), residuals[:,i], color='blue', alpha=0.1)
# Plot the inter-quartile
plt.plot(range(residuals.shape[0]), q1, color='orange', label='25th percentile')
plt.plot(range(residuals.shape[0]), q3, color='orange', label='75th percentile')
if ses_id is not None:
plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.legend()
if groupedby:
plt.title(groupedby, fontsize=16)
plot_edgewise_residual(residuals)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Residual Value', fontsize=14)
plt.show()
Retrieve confounds¶
Retrieve scan time and day of week from the scans.tsv in the dataset repo¶
# Retrieve the confounds from the scans.tsv
confounds_of_interest = [
'caffeine_2h',
'caffeine_24h'
]
scans_df = get_confounds(dataset_path, confound_path, confounds_of_interest, iqms_path=iqms_path)
# Keep only the magnitude part of functional scans and first echo
scans_df = scans_df[scans_df["modality"] == "func"]
# Create dataframe based on the list of files
files_df = pd.DataFrame(files, columns=['file_path'])
files_df = files_df.assign(
subject=files_df["file_path"].str.extract(r"sub-(\d+)/"),
session=files_df["file_path"].str.extract(r"ses-(\w+)/"),
task=files_df["file_path"].str.extract(r"task-(\w+)_"),
)
#Pair filenames with acquisition time
confounds_df = pd.merge(
scans_df, files_df, on=["subject", "session", "task"], how="inner"
)
confounds_df.drop(columns=["file_path"], inplace=True)
print(confounds_df.head())
Warning: Only the IQMs corresponding to the second echo are retained in the IQMs DataFrame.
filename \
0 sub-001/ses-001/func/sub-001_ses-001_task-rsmo...
1 sub-001/ses-003/func/sub-001_ses-003_task-rsmo...
2 sub-001/ses-004/func/sub-001_ses-004_task-rsmo...
3 sub-001/ses-005/func/sub-001_ses-005_task-rsmo...
4 sub-001/ses-006/func/sub-001_ses-006_task-rsmo...
acq_time subject session modality task pe_dir \
0 2023-10-20T19:50:01.187500 001 001 func rsmovie LR
1 2023-10-21T10:12:32.207500 001 003 func rsmovie LR
2 2023-10-21T12:22:08.222500 001 004 func rsmovie RL
3 2023-10-22T10:33:53.380000 001 005 func rsmovie PA
4 2023-10-22T12:18:06.200000 001 006 func rsmovie PA
day_of_week time_of_day fd_mean caffeine_2h caffeine_24h
0 Friday 20:00:00 0.119245 0.0 2.0
1 Saturday 10:00:00 0.096035 1.0 1.0
2 Saturday 12:00:00 0.146882 0.0 1.0
3 Sunday 11:00:00 0.112293 1.0 1.0
4 Sunday 12:00:00 0.088701 1.0 1.0
# Merge residuals with confounds_df to get the time_of_day for each session
residuals_df = pd.DataFrame(residuals)
print("Shape of residuals array (n_samples, n_features):", residuals.shape)
residuals_df = residuals_df.assign(
session = files_df['session'],
)
residuals_df = pd.merge(
residuals_df,
confounds_df[['session', 'day_of_week', 'time_of_day', 'pe_dir', 'caffeine_2h', 'caffeine_24h']],
on='session'
)
print("Shape of residuals_df after merging with confounds_df:", residuals_df.shape)
print("First five rows of residuals_df after merge:")
print(residuals_df.head())
Shape of residuals array (n_samples, n_features): (36, 7140)
Shape of residuals_df after merging with confounds_df: (36, 7146)
First five rows of residuals_df after merge:
0 1 2 3 4 5 6 \
0 -1.731915e-15 0.008568 -0.014353 -0.080859 -0.002130 -0.012745 -0.011353
1 -1.081627e-15 0.001091 0.020877 -0.020616 0.032127 0.067218 0.034650
2 -8.339862e-16 0.016150 -0.056996 -0.079078 -0.110628 -0.012115 0.010908
3 8.614996e-17 0.013174 -0.056856 -0.045357 0.031062 0.009405 -0.018774
4 -8.310795e-16 -0.030164 0.134168 0.060146 0.034667 0.028640 0.026211
7 8 9 ... 7136 7137 7138 7139 session \
0 0.002458 0.055776 0.125133 ... 0.026551 0.0 0.031856 0.0 001
1 0.029521 -0.033804 -0.107729 ... 0.031296 0.0 0.035877 0.0 003
2 0.011550 0.001944 0.015197 ... 0.000338 0.0 -0.024277 0.0 004
3 -0.024694 0.006120 -0.001600 ... -0.009069 0.0 0.026941 0.0 005
4 0.040799 -0.000657 -0.049267 ... -0.041234 0.0 -0.006108 0.0 006
day_of_week time_of_day pe_dir caffeine_2h caffeine_24h
0 Friday 20:00:00 LR 0.0 2.0
1 Saturday 10:00:00 LR 1.0 1.0
2 Saturday 12:00:00 RL 0.0 1.0
3 Sunday 11:00:00 PA 1.0 1.0
4 Sunday 12:00:00 PA 1.0 1.0
[5 rows x 7146 columns]
res = residuals_df.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir']).values.T
residuals_df = residuals_df.assign(
variance = np.var(res, axis=0)
)
# Cast strings to type compatible with statistical models
residuals_df["session"] = pd.to_numeric(residuals_df["session"])
residuals_df["caffeine_2h"] = pd.to_numeric(residuals_df["caffeine_2h"])
residuals_df["caffeine_24h"] = pd.to_numeric(residuals_df["caffeine_24h"])
residuals_df["day_of_week"] = residuals_df["day_of_week"].astype("category")
residuals_df["time_of_day"] = residuals_df["time_of_day"].astype("category")
residuals_df["pe_dir"] = residuals_df["pe_dir"].astype("category")
print(residuals_df.head())
print("Here are the characteristics of the session with the highest variance:")
max_variance_row = residuals_df.loc[residuals_df['variance'].idxmax()]
print(max_variance_row)
residuals_wo_outlier_df = residuals_df.drop(max_variance_row.name)
0 1 2 3 4 5 6 \
0 -1.731915e-15 0.008568 -0.014353 -0.080859 -0.002130 -0.012745 -0.011353
1 -1.081627e-15 0.001091 0.020877 -0.020616 0.032127 0.067218 0.034650
2 -8.339862e-16 0.016150 -0.056996 -0.079078 -0.110628 -0.012115 0.010908
3 8.614996e-17 0.013174 -0.056856 -0.045357 0.031062 0.009405 -0.018774
4 -8.310795e-16 -0.030164 0.134168 0.060146 0.034667 0.028640 0.026211
7 8 9 ... 7137 7138 7139 session \
0 0.002458 0.055776 0.125133 ... 0.0 0.031856 0.0 1
1 0.029521 -0.033804 -0.107729 ... 0.0 0.035877 0.0 3
2 0.011550 0.001944 0.015197 ... 0.0 -0.024277 0.0 4
3 -0.024694 0.006120 -0.001600 ... 0.0 0.026941 0.0 5
4 0.040799 -0.000657 -0.049267 ... 0.0 -0.006108 0.0 6
day_of_week time_of_day pe_dir caffeine_2h caffeine_24h variance
0 Friday 20:00:00 LR 0.0 2.0 0.002463
1 Saturday 10:00:00 LR 1.0 1.0 0.002472
2 Saturday 12:00:00 RL 0.0 1.0 0.002126
3 Sunday 11:00:00 PA 1.0 1.0 0.001782
4 Sunday 12:00:00 PA 1.0 1.0 0.002011
[5 rows x 7147 columns]
Here are the characteristics of the session with the highest variance:
0 0.0
1 -0.0818
2 0.034087
3 -0.179126
4 -0.102308
...
time_of_day 20:00:00
pe_dir RL
caffeine_2h 0.0
caffeine_24h 2.0
variance 0.004153
Name: 21, Length: 7147, dtype: object
Test habituation significance¶
# Fit the linear mixed effects model
X = sm.add_constant(residuals_df["session"]) # Adds the intercept term
y = residuals_df["variance"]
model = sm.OLS(y, X).fit()
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: variance R-squared: 0.016
Model: OLS Adj. R-squared: -0.013
Method: Least Squares F-statistic: 0.5514
Date: Wed, 08 Apr 2026 Prob (F-statistic): 0.463
Time: 11:40:22 Log-Likelihood: 217.20
No. Observations: 36 AIC: -430.4
Df Residuals: 34 BIC: -427.2
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0027 0.000 14.080 0.000 0.002 0.003
session 4.997e-06 6.73e-06 0.743 0.463 -8.68e-06 1.87e-05
==============================================================================
Omnibus: 1.571 Durbin-Watson: 1.210
Prob(Omnibus): 0.456 Jarque-Bera (JB): 1.178
Skew: 0.441 Prob(JB): 0.555
Kurtosis: 2.906 Cond. No. 53.5
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# #Statistics excluding the outlier
# # Fit the linear mixed effects model
# X = sm.add_constant(residuals_wo_outlier_df["session"]) # Adds the intercept term
# y = residuals_wo_outlier_df["variance"]
# model_wo_outlier = sm.OLS(y, X).fit()
# print(model_wo_outlier.summary())
# from statsmodels.stats.multitest import multipletests
# # Assuming you have a list of p-values from multiple tests
# p_values = [model.pvalues["session"], model_wo_outlier.pvalues["session"]]
# # Apply Bonferroni correction
# corrected_p_values = multipletests(p_values, method='bonferroni')[1]
# print(f"Original p-values: {p_values}")
# print(f"Corrected p-values: {corrected_p_values}")
Plot residuals grouped by bins of confound factor¶
Day of week¶
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'day'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 3, figsize=(20, 20))
min_res_var = 1000
max_res_var = 0
for i, (day_of_week, group) in enumerate(residuals_df.groupby('day_of_week', observed=False)):
plt.sca(axes[i // 3, i % 3])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
min_res_var, max_res_var = plot_res_variance(res, groupedby=day_of_week, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)
#Set same y-axis limits for all subplots
for ax in axes.flat:
ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)
# Hide the last two subplots (axes[2, 1] and axes[2, 2])
axes[2, 1].axis('off')
axes[2, 2].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Residual variance evolution separated by day of week.**" \
"We fitted negative exponential and quadratic curves only if the plot had more than three points."
save_caption(caption, save_layout, entities)
Time of day¶
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'time'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 4, figsize=(30, 20))
min_res_var = 1000
max_res_var = 0
for i, (time_of_day, group) in enumerate(residuals_df.groupby('time_of_day', observed=False)):
print("Time of day:", time_of_day)
plt.sca(axes[i // 4, i % 4])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
print("Shape of residuals:", res.shape)
if res.shape[1] > 1:
min_res_var, max_res_var = plot_res_variance(res, groupedby=time_of_day, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)
#Set same y-axis limits for all subplots
for ax in axes.flat:
ax.set_ylim(min_res_var-0.01, max_res_var+0.01)
# Hide the last axis
axes[2, 2].axis('off')
axes[2, 3].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Residual variance evolution separated by time of day.**"
save_caption(caption, save_layout, entities)
Time of day: 09:00:00 Shape of residuals: (1, 7141) Time of day: 10:00:00 Shape of residuals: (2, 7141) Time of day: 11:00:00 Shape of residuals: (2, 7141) Time of day: 12:00:00 Shape of residuals: (3, 7141) Time of day: 18:00:00 Shape of residuals: (2, 7141) Time of day: 19:00:00 Shape of residuals: (2, 7141) Time of day: 20:00:00 Shape of residuals: (11, 7141) Time of day: 21:00:00 Shape of residuals: (5, 7141) Time of day: 22:00:00 Shape of residuals: (7, 7141) Time of day: 23:00:00 Shape of residuals: (1, 7141)
Phase Encoding Direction¶
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'pe_dir'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
plt.sca(axes[i // 2, i % 2])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
min_res_var, max_res_var = plot_res_variance(res, groupedby=pe_dir, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)
#Set same y-axis limits for all subplots
for ax in axes.flat:
ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)
fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Residual variance evolution separated by phase encoding direction.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'pe_dir'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
plt.sca(axes[i // 2, i % 2])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
plot_edgewise_residual(res, groupedby=pe_dir, ses_id=ses_id)
fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
/tmp/ipykernel_538929/2248055642.py:18: UserWarning: Creating legend with loc="best" can be slow with large amounts of data. plt.savefig(save_path) /home/cprovins/miniconda3/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Creating legend with loc="best" can be slow with large amounts of data. fig.canvas.print_figure(bytes_io, **kw)
Caffeine consumption 2h before¶
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine2h'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(21, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
plt.sca(axes[i])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)
#Set same y-axis limits for all subplots
for ax in axes.flat:
ax.set_ylim(min_res_var-0.0001, max_res_var+0.0001)
# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")
fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Residual variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine2h'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(19, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
plt.sca(axes[i])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
plot_edgewise_residual(res, groupedby=caf, ses_id= ses_id)
# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")
fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Edgewise variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)
Caffeine consumption 24h before¶
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine24h'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
plt.sca(axes[i])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)
#Set same y-axis limits for all subplots
for ax in axes.flat:
ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)
# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")
fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Residual variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine24h'}
entities.update(entities_base)
# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
plt.sca(axes[i])
ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
plot_edgewise_residual(res, groupedby=caf, ses_id=ses_id)
# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")
fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Edgewise variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)
Do some confound explain why ses-043 is seemingly less correlated to the other sessions? Can we find a reason that justifies its exclusion from the fMRI analysis?¶
confounds_path = '/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv'
confounds_df = pd.read_csv(confounds_path, sep='\t')
# Keep only the confounds from the reliability sessions
confounds_df = confounds_df[confounds_df['session_number'].astype(str).str.zfill(3).str.startswith('0')]
confounds_df['session_number'] = confounds_df['session_number'].astype(str).str.zfill(3)
confounds_df.head()
| participant_id | session_number | scanner | pe_dir | date | time | weather_temperature | weather_wind | weather_precipitation | weather_precipitation_type | ... | slept_qct | slept_rest | slept_bht | rumination | anxiety | physical_pain | time_slept_t1w | time_slept_rest | time_slept_diffusion | details_rumination_anxiety_pain | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 37 | 1 | 048 | Prisma | PA | 11-14-2023 | 20:45 | 11-14 | 10.4 | 43.0 | 4 Rain or drizzle | ... | No | No | No | 3.0 | 6.0 | 6.0 | NaN | NaN | NaN | NaN |
| 38 | 1 | 047 | Prisma | RL | 11-14-2023 | 18:50 | 11-14 | 10.4 | 43.0 | 4 Rain or drizzle | ... | No | No | No | 3.0 | 6.0 | 5.0 | NaN | NaN | NaN | NaN |
| 39 | 1 | 046 | Prisma | LR | 11-13-2023 | 20:15 | 10-16 | 8.7 | 7.0 | 2 Scattered precipitation (liquid) | ... | No | No | No | 2.0 | 6.0 | 6.0 | NaN | NaN | NaN | NaN |
| 40 | 1 | 045 | Prisma | LR | 11-13-2023 | 18:30 | 10-16 | 8.7 | 7.0 | 2 Scattered precipitation (liquid) | ... | No | No | No | 3.0 | 6.0 | 6.0 | NaN | NaN | NaN | NaN |
| 41 | 1 | 044 | Prisma | LR | 11-12-2023 | 11:10 | 5-10 | 7.0 | 0.1 | 2 Scattered precipitation (liquid) | ... | No | No | No | 3.0 | 6.0 | 6.0 | NaN | NaN | NaN | NaN |
5 rows × 78 columns
confound_to_plot = confounds_df.columns.drop(['participant_id','session_number', 'scanner', 'pe_dir', 'remove_jewelry', 'mr_room_helium', 'details_rumination_anxiety_pain', 'time_slept_t1w', 'time_slept_rest', 'time_slept_diffusion'])
confound_to_plot
Index(['date', 'time', 'weather_temperature', 'weather_wind',
'weather_precipitation', 'weather_precipitation_type',
'weather_pressure', 'weather_humidity', 'weather_daylight',
'blood_pressure', 'caffeine_24h', 'caffeine_2h', 'alcohol', 'triptans',
'nsaids', 'weight', 'general_health', 'general_stress',
'ease_concentration', 'beh_work', 'beh_freetime', 'beh_sport',
'beh_outdoors', 'beh_electronic_devices', 'beh_active_social',
'beh_passive_social', 'sleep_quality', 'remember_dreams', 'day_dreams',
'time_to_bed', 'time_in_bed', 'steps', 'distance_walked',
'stories_climbed', 'calories', 'interested', 'distressed', 'excited',
'upset', 'strong', 'guilty', 'scared', 'hostile', 'enthusiastic',
'proud', 'irritable', 'alert', 'ashamed', 'inspired', 'nervous',
'determined', 'attentive', 'jittery', 'active', 'afraid',
'filled_all_entries', 'mr_room_temperature', 'mr_room_humidity',
'mr_room_pressure', 'slept_session', 'slept_t1w', 'slept_diffusion',
'slept_qct', 'slept_rest', 'slept_bht', 'rumination', 'anxiety',
'physical_pain'],
dtype='object')
# Convert 'time' column to datetime, round up to the closest full hour
confounds_df['time'] = pd.to_datetime(confounds_df['time'], format='%H:%M', errors='coerce').dt.round('h').dt.strftime('%H').astype(float)
slept_cols = [col for col in confounds_df.columns if col.startswith('slept')]
confounds_df[slept_cols] = confounds_df[slept_cols].replace({'Yes': 1, 'No': 0})
Visualize the distribution of all confounds and highlight as a red dot the value corresponding to ses-043
# Entities specific to this plot
entities = {'visualization': 'confounddist'}
entities.update(entities_base)
# Filter sessions with '043' in session_number (as string, zero-padded if needed)
highlight_mask = confounds_df['session_number'].astype(str).str.zfill(3).str.contains('043')
n_cols = 5
n_plots = len(confound_to_plot)
n_rows = int(np.ceil(n_plots / n_cols))
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3+10))
axes = axes.flatten()
for idx, confound in enumerate(confound_to_plot):
ax = axes[idx]
y = confounds_df[confound]
x = np.arange(len(y))
ax.scatter(x, y, alpha=0.7)
# Highlight session 043
ax.scatter(x[highlight_mask], y[highlight_mask], color='red', label='Session 043')
ax.set_title(confound)
ax.grid(True)
# Hide unused subplots
for j in range(idx + 1, len(axes)):
axes[j].set_visible(False)
#plt.tight_layout()
plt.show()
caption = f"**Confounds distribution.** To try to understand whether some confound can explain that Session 043 is seemingly less correlated" \
"to the other sessions, we plotted the distribution of all confounds and highlighted as a red dot the value corresponding to Session 043." \
"Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions." \
"Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor," \
"away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual." \
"This session is also associated with the only time the sleep quality was recorded at the minimum."
save_caption(caption, save_layout, entities)
Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions. Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor, away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual. This session is also associated with the only time the sleep quality was recorded at the minimum.