Does replaying the same movie clip reduce the variability of resting-state fMRI?¶

The interactive version of this notebook is available at https://github.com/TheAxonLab/hcph-sops/tree/mkdocs/docs/analysis.

In [3]:

Copied!





import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import nibabel as nib
from pathlib import Path
from bids.layout import BIDSLayout, BIDSLayoutIndexer, add_config_paths
from confounds import get_confounds
from load_save import get_atlas_data

#Define filtering parameters
metric = "correlation"
atlas_dimension = "128"
fd_threshold = 0.5
fdthresh_str = str(fd_threshold).replace(".", "")

# Define paths
code_path = Path("/data/code/hcph-sops/")
fc_path = Path(f"/data/derivatives/hcph-fc/")
dataset_path = Path("/data/datasets/hcph-dataset")
suppl_path = Path("/data/derivatives/hcph-suppl/")
confound_path = Path("/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv")
iqms_path = Path("/data/derivatives/hcph-mriqc/group_bold.tsv")
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import nibabel as nib
from pathlib import Path
from bids.layout import BIDSLayout, BIDSLayoutIndexer, add_config_paths
from confounds import get_confounds
from load_save import get_atlas_data

#Define filtering parameters
metric = "correlation"
atlas_dimension = "128"
fd_threshold = 0.5
fdthresh_str = str(fd_threshold).replace(".", "")

# Define paths
code_path = Path("/data/code/hcph-sops/")
fc_path = Path(f"/data/derivatives/hcph-fc/")
dataset_path = Path("/data/datasets/hcph-dataset")
suppl_path = Path("/data/derivatives/hcph-suppl/")
confound_path = Path("/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv")
iqms_path = Path("/data/derivatives/hcph-mriqc/group_bold.tsv")

In [4]:

Copied!





## Helper function to save figure captions
def save_caption(caption, save_layout, entities):
    """
    Save the figure caption to a JSON file.
    
    Parameters:
    - caption: The caption text to save.
    - save_layout: A BIDSLayout object for saving the caption.
    - entities: A dictionary containing BIDS entities for the filename.
    """
    # Update the entities to use .json extension
    entities['extension'] = '.json'
    json_save_path = save_layout.build_path(entities, validate=False)

    # Save the caption to a JSON file
    caption_data = {"caption": caption}
    with open(json_save_path, 'w') as json_file:
        json.dump(caption_data, json_file)
## Helper function to save figure captions
def save_caption(caption, save_layout, entities):
    """
    Save the figure caption to a JSON file.
    
    Parameters:
    - caption: The caption text to save.
    - save_layout: A BIDSLayout object for saving the caption.
    - entities: A dictionary containing BIDS entities for the filename.
    """
    # Update the entities to use .json extension
    entities['extension'] = '.json'
    json_save_path = save_layout.build_path(entities, validate=False)

    # Save the caption to a JSON file
    caption_data = {"caption": caption}
    with open(json_save_path, 'w') as json_file:
        json.dump(caption_data, json_file)

Load the functional connectivity matrices¶

In [5]:

Copied!





# Initialize the BIDS layout, we use a custom indexer because we are using entities that are not yet officially recognized by BIDS 
config_path = code_path / "code/bids/indexer.json"
try:
    add_config_paths(hcph=config_path)
except ValueError as e:
    if "Configuration 'hcph' already exists" in str(e):
        print("Configuration 'hcph' already exists, skipping add_config_paths.")
    else:
        raise e
_indexer = BIDSLayoutIndexer(
    config_filename=config_path,
    index_metadata=False,
    validate=False,
)       
layout = BIDSLayout(fc_path, config="hcph", indexer=_indexer, validate=False)
save_layout = BIDSLayout(suppl_path, config="hcph", indexer=_indexer, validate=False)

# Get all the files matching the pattern
entities_base = {
    'subject': '001',
    'task': 'rsmovie',
    'measure': metric,
    'scale': atlas_dimension,
    'fd_threshold': fdthresh_str,
}
files = layout.get(**entities_base, suffix='connectivity', extension='.tsv', return_type='file')
ses_index = [tsv.split('ses-')[1].split('/')[0] for tsv in files]
assert len(files) == 36, f"Expected 36 FC matrices, got {len(files)}"

# Load the data from each file and concatenate
# Keep only upper-triangle since the FC matrix is symmetric (does the diagonal of our FC contain only ones ? if yes can discard the diagonal, if not have to keep it 
fc_matrices = []
for file in files:
    fc_matrix = pd.read_csv(file, sep='\t', header=None)
    if metric == "sparseinversecovariance":
        # Negate the precision matrix to get partial correlation matrix
        fc_matrix = -fc_matrix
    
    fc_matrices.append(fc_matrix.values[np.triu_indices_from(fc_matrix, k=0)])

# Store the matrix size for reconstruction later
fc_size = fc_matrix.shape[0]
# Concatenate all the upper triangle values
fc_concat = np.vstack(fc_matrices)

# Load region names
atlas_data = get_atlas_data(dimension=int(atlas_dimension))
atlas_labels = getattr(atlas_data, "labels")
region_labels = atlas_labels["difumo_names"]
assert len(region_labels) == fc_size, f"Expected {fc_size} region labels, got {len(region_labels)}"

#Plot an example FC matrix
plt.figure(figsize=(8,6))
sns.heatmap(fc_matrix, cmap='viridis')
plt.title(f"Example FC matrix")
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
#plt.yticks(ticks=np.arange(len(region_labels))+0.5, labels=region_labels)

# Generate a random matrix of size 2080x36
#fc_concat = np.random.rand(2080, 36)
print(f"Number of brain regions (matrix size): {fc_size}")
print(f"Shape of concatenated upper triangle FC matrices (number of region pairs, number of sessions): {fc_concat.shape}")
# Initialize the BIDS layout, we use a custom indexer because we are using entities that are not yet officially recognized by BIDS 
config_path = code_path / "code/bids/indexer.json"
try:
    add_config_paths(hcph=config_path)
except ValueError as e:
    if "Configuration 'hcph' already exists" in str(e):
        print("Configuration 'hcph' already exists, skipping add_config_paths.")
    else:
        raise e
_indexer = BIDSLayoutIndexer(
    config_filename=config_path,
    index_metadata=False,
    validate=False,
)       
layout = BIDSLayout(fc_path, config="hcph", indexer=_indexer, validate=False)
save_layout = BIDSLayout(suppl_path, config="hcph", indexer=_indexer, validate=False)

# Get all the files matching the pattern
entities_base = {
    'subject': '001',
    'task': 'rsmovie',
    'measure': metric,
    'scale': atlas_dimension,
    'fd_threshold': fdthresh_str,
}
files = layout.get(**entities_base, suffix='connectivity', extension='.tsv', return_type='file')
ses_index = [tsv.split('ses-')[1].split('/')[0] for tsv in files]
assert len(files) == 36, f"Expected 36 FC matrices, got {len(files)}"

# Load the data from each file and concatenate
# Keep only upper-triangle since the FC matrix is symmetric (does the diagonal of our FC contain only ones ? if yes can discard the diagonal, if not have to keep it 
fc_matrices = []
for file in files:
    fc_matrix = pd.read_csv(file, sep='\t', header=None)
    if metric == "sparseinversecovariance":
        # Negate the precision matrix to get partial correlation matrix
        fc_matrix = -fc_matrix
    
    fc_matrices.append(fc_matrix.values[np.triu_indices_from(fc_matrix, k=0)])

# Store the matrix size for reconstruction later
fc_size = fc_matrix.shape[0]
# Concatenate all the upper triangle values
fc_concat = np.vstack(fc_matrices)

# Load region names
atlas_data = get_atlas_data(dimension=int(atlas_dimension))
atlas_labels = getattr(atlas_data, "labels")
region_labels = atlas_labels["difumo_names"]
assert len(region_labels) == fc_size, f"Expected {fc_size} region labels, got {len(region_labels)}"

#Plot an example FC matrix
plt.figure(figsize=(8,6))
sns.heatmap(fc_matrix, cmap='viridis')
plt.title(f"Example FC matrix")
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
#plt.yticks(ticks=np.arange(len(region_labels))+0.5, labels=region_labels)

# Generate a random matrix of size 2080x36
#fc_concat = np.random.rand(2080, 36)
print(f"Number of brain regions (matrix size): {fc_size}")
print(f"Shape of concatenated upper triangle FC matrices (number of region pairs, number of sessions): {fc_concat.shape}")

[get_dataset_dir] Dataset found in /home/cprovins/nilearn_data/difumo_atlases
Number of brain regions (matrix size): 119
Shape of concatenated upper triangle FC matrices (number of region pairs, number of sessions): (36, 7140)

No description has been provided for this image

In [6]:

Copied!





# Add the information that apply too all figures generated below to the base entities
entities_base["experiment"] = "fmrivar"
entities_base["extension"] = ".svg"
entities_base["datatype"] = "figures"
entities_base["suffix"] = "bold"
# Add the information that apply too all figures generated below to the base entities
entities_base["experiment"] = "fmrivar"
entities_base["extension"] = ".svg"
entities_base["datatype"] = "figures"
entities_base["suffix"] = "bold"

In [7]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'similmat'}
entities.update(entities_base)

similarity_matrix = np.corrcoef(fc_concat.T, rowvar=False)
print(f"Shape of the similarity matrix (number of sessions x number of sessions): {similarity_matrix.shape}")

mean_corr = np.mean(similarity_matrix)
print(f"Overall mean correlation: {mean_corr}")

plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, cmap='viridis')
plt.xticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index, rotation=90)
plt.yticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Session Index', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Similarity matrix presenting the Pearson correlation between FC matrices for each session.**The overall mean correlation is {mean_corr:.2f}." \
"The fact that the FC matrices are highly correlated probably explains why only two components are enough to explain the vast majority of the variance." \
"It is also visible that session index 30 is less correlated with the other sessions compared to the other pair-wise comparisons."
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'similmat'}
entities.update(entities_base)

similarity_matrix = np.corrcoef(fc_concat.T, rowvar=False)
print(f"Shape of the similarity matrix (number of sessions x number of sessions): {similarity_matrix.shape}")

mean_corr = np.mean(similarity_matrix)
print(f"Overall mean correlation: {mean_corr}")

plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, cmap='viridis')
plt.xticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index, rotation=90)
plt.yticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Session Index', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Similarity matrix presenting the Pearson correlation between FC matrices for each session.**The overall mean correlation is {mean_corr:.2f}." \
"The fact that the FC matrices are highly correlated probably explains why only two components are enough to explain the vast majority of the variance." \
"It is also visible that session index 30 is less correlated with the other sessions compared to the other pair-wise comparisons."
save_caption(caption, save_layout, entities)

Shape of the similarity matrix (number of sessions x number of sessions): (36, 36)
Overall mean correlation: 0.8581150373039423

Run PCA to estimate the residual variance for each rsfMRI run¶

Choose number of principal components¶

We are using the elbow plot to decide the number of principal components to keep.

In [8]:

Copied!





from sklearn.decomposition import PCA
# Entities specific to this plot
entities = {'visualization': 'explainedvar'}
entities.update(entities_base)

# Run PCA
# Center the data by removing the mean of each connection
fc_concat = fc_concat - np.mean(fc_concat, axis=0)

# Standardize the data to have unit variance
#fc_concat = fc_concat / np.std(fc_concat, axis=0)

# Run PCA
pca = PCA()
pca.fit(fc_concat)

# Extract the explained variance
explained_variance = pca.explained_variance_ratio_

# Generate the elbow plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='-')
ax.set_xlabel('Number of Components', fontsize=14)
ax.set_ylabel('Explained Variance Ratio', fontsize=14)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

#Define the number of components to keep
n_comp = 6

# Highlight the points corresponding to number of components we keep in red
ax.plot(n_comp, explained_variance[n_comp-1], marker='o', markersize=8, color='red')
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = "**Elbow plot to choose the number of principal components.**The point in red indicates the number of components selected."
save_caption(caption, save_layout, entities)
from sklearn.decomposition import PCA
# Entities specific to this plot
entities = {'visualization': 'explainedvar'}
entities.update(entities_base)

# Run PCA
# Center the data by removing the mean of each connection
fc_concat = fc_concat - np.mean(fc_concat, axis=0)

# Standardize the data to have unit variance
#fc_concat = fc_concat / np.std(fc_concat, axis=0)

# Run PCA
pca = PCA()
pca.fit(fc_concat)

# Extract the explained variance
explained_variance = pca.explained_variance_ratio_

# Generate the elbow plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='-')
ax.set_xlabel('Number of Components', fontsize=14)
ax.set_ylabel('Explained Variance Ratio', fontsize=14)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

#Define the number of components to keep
n_comp = 6

# Highlight the points corresponding to number of components we keep in red
ax.plot(n_comp, explained_variance[n_comp-1], marker='o', markersize=8, color='red')
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = "**Elbow plot to choose the number of principal components.**The point in red indicates the number of components selected."
save_caption(caption, save_layout, entities)

Plot the principal components¶

In [9]:

Copied!





# Function to reconstruct the full matrix from the upper triangle
def reconstruct_full_matrix(upper_triangle, size):
    full_matrix = np.zeros((size, size))
    upper_indices = np.triu_indices(size)
    full_matrix[upper_indices] = upper_triangle
    full_matrix = full_matrix + full_matrix.T - np.diag(np.diag(full_matrix))
    return full_matrix

# Reorder regions by network
network_order = atlas_labels['yeo_networks7'].argsort()
reordered_regions_labels = atlas_labels.iloc[network_order]['difumo_names']
for i, pc in enumerate(pca.components_[:n_comp, :]):
    # Reconstruct the full matrix from the upper triangle
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    
    # Plot the principal component as a heatmap
    plt.figure(figsize=(20, 18))
    sns.heatmap(full_matrix, cmap='viridis')
    plt.title(f'Principal Component {i+1}')
    plt.xlabel('Brain Regions')
    plt.ylabel('Brain Regions')
    # Adjust y-ticks to move them further away from the heatmap
    plt.yticks(ticks=np.arange(len(reordered_regions_labels)) + 0.5, labels=reordered_regions_labels, va='center', rotation=0)
    plt.gca().tick_params(axis='y', pad=15)  # Move y-ticks further away
    plt.gca().tick_params(axis='x', pad=15)  # Move x-ticks further away

    # Add a color bar on the left and bottom ledge indicating the network
    network_colors = sns.color_palette("tab20", n_colors=7)  # Define 7 colors for the networks
    network_segments = atlas_labels.iloc[network_order]['yeo_networks7'].astype('category').cat.codes

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((-1.5, idx), 1.5, 1, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 1.5, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add legend for the networks
    network_labels = atlas_labels['yeo_networks7'].astype('category').cat.categories
    legend_patches = [plt.Line2D([0], [0], color=network_colors[i], lw=4, label=label) for i, label in enumerate(network_labels)]
    plt.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(1.37, 1), title="Networks", fontsize=12, title_fontsize=14)

# Plot all principal components as heatmaps in a single figure with subplots
n_rows = int(np.ceil(n_comp / 3))
n_cols = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))

for i, pc in enumerate(pca.components_[:n_comp, :]):
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    ax = axes[i // n_cols, i % n_cols]
    sns.heatmap(full_matrix, cmap='viridis', ax=ax, cbar_kws={'shrink': 0.5})
    ax.set_title(f'Principal Component {i+1}')
    ax.set_xlabel('Brain Regions', labelpad=20)
    ax.set_ylabel('Brain Regions', labelpad=20)
    ax.set_xticks([])  # Hide x-ticks
    ax.set_yticks([])  # Hide y-ticks

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((-3, idx), 3, 1, color=network_colors[color], transform=ax.transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 3, color=network_colors[color], transform=ax.transData, clip_on=False))

# Add a single legend for the networks on the right of the figure
fig.legend(
    handles=legend_patches,
    loc='center right',
    bbox_to_anchor=(1.1, 0.5),  # Position the legend outside the figure, vertically centered
    title="Networks",
    fontsize=10,
    title_fontsize=12
)

# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
    axes[j // n_cols, j % n_cols].axis('off')

plt.tight_layout()
plt.show()
# Function to reconstruct the full matrix from the upper triangle
def reconstruct_full_matrix(upper_triangle, size):
    full_matrix = np.zeros((size, size))
    upper_indices = np.triu_indices(size)
    full_matrix[upper_indices] = upper_triangle
    full_matrix = full_matrix + full_matrix.T - np.diag(np.diag(full_matrix))
    return full_matrix

# Reorder regions by network
network_order = atlas_labels['yeo_networks7'].argsort()
reordered_regions_labels = atlas_labels.iloc[network_order]['difumo_names']
for i, pc in enumerate(pca.components_[:n_comp, :]):
    # Reconstruct the full matrix from the upper triangle
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    
    # Plot the principal component as a heatmap
    plt.figure(figsize=(20, 18))
    sns.heatmap(full_matrix, cmap='viridis')
    plt.title(f'Principal Component {i+1}')
    plt.xlabel('Brain Regions')
    plt.ylabel('Brain Regions')
    # Adjust y-ticks to move them further away from the heatmap
    plt.yticks(ticks=np.arange(len(reordered_regions_labels)) + 0.5, labels=reordered_regions_labels, va='center', rotation=0)
    plt.gca().tick_params(axis='y', pad=15)  # Move y-ticks further away
    plt.gca().tick_params(axis='x', pad=15)  # Move x-ticks further away

    # Add a color bar on the left and bottom ledge indicating the network
    network_colors = sns.color_palette("tab20", n_colors=7)  # Define 7 colors for the networks
    network_segments = atlas_labels.iloc[network_order]['yeo_networks7'].astype('category').cat.codes

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((-1.5, idx), 1.5, 1, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 1.5, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add legend for the networks
    network_labels = atlas_labels['yeo_networks7'].astype('category').cat.categories
    legend_patches = [plt.Line2D([0], [0], color=network_colors[i], lw=4, label=label) for i, label in enumerate(network_labels)]
    plt.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(1.37, 1), title="Networks", fontsize=12, title_fontsize=14)

# Plot all principal components as heatmaps in a single figure with subplots
n_rows = int(np.ceil(n_comp / 3))
n_cols = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))

for i, pc in enumerate(pca.components_[:n_comp, :]):
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    ax = axes[i // n_cols, i % n_cols]
    sns.heatmap(full_matrix, cmap='viridis', ax=ax, cbar_kws={'shrink': 0.5})
    ax.set_title(f'Principal Component {i+1}')
    ax.set_xlabel('Brain Regions', labelpad=20)
    ax.set_ylabel('Brain Regions', labelpad=20)
    ax.set_xticks([])  # Hide x-ticks
    ax.set_yticks([])  # Hide y-ticks

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((-3, idx), 3, 1, color=network_colors[color], transform=ax.transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 3, color=network_colors[color], transform=ax.transData, clip_on=False))

# Add a single legend for the networks on the right of the figure
fig.legend(
    handles=legend_patches,
    loc='center right',
    bbox_to_anchor=(1.1, 0.5),  # Position the legend outside the figure, vertically centered
    title="Networks",
    fontsize=10,
    title_fontsize=12
)

# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
    axes[j // n_cols, j % n_cols].axis('off')

plt.tight_layout()
plt.show()

In [10]:

Copied!

print(f"PC 1 has mostly negative weight on region {region_labels[86]}")
print(f"PC 2 has mostly positive weight on regions {region_labels[10]}, {region_labels[79]}, {region_labels[86]} and {region_labels[90]}")
print(f"PC 1 has mostly negative weight on region {region_labels[86]}")
print(f"PC 2 has mostly positive weight on regions {region_labels[10]}, {region_labels[79]}, {region_labels[86]} and {region_labels[90]}")

PC 1 has mostly negative weight on region Internal capsule
PC 2 has mostly positive weight on regions Cerebellum Crus I posterior, Superior temporal sulcus LH, Internal capsule and Lingual gyrus

Reconstruct the FC with a limited number of components¶

After extracting the principal components, each FC matrix can be expressed as a linear combinations of the principal components plus a residual matrix.

In [11]:

Copied!





print("Shape of PCA components matrix (n_components, n_features):", pca.components_.shape)
# Project PC components back into each FC subject space
fc_projected = pca.transform(fc_concat)[:, :n_comp]
print("Shape of projected FC onto the first n_comp PCs (n_samples, n_comp):", fc_projected.shape)
fc_reconstructed = np.dot(fc_projected, pca.components_[:n_comp, :]) + np.mean(fc_concat, axis=0)
print("Shape of reconstructed FC matrix (n_samples, n_features):", fc_reconstructed.shape)
print("Shape of PCA components matrix (n_components, n_features):", pca.components_.shape)
# Project PC components back into each FC subject space
fc_projected = pca.transform(fc_concat)[:, :n_comp]
print("Shape of projected FC onto the first n_comp PCs (n_samples, n_comp):", fc_projected.shape)
fc_reconstructed = np.dot(fc_projected, pca.components_[:n_comp, :]) + np.mean(fc_concat, axis=0)
print("Shape of reconstructed FC matrix (n_samples, n_features):", fc_reconstructed.shape)

Shape of PCA components matrix (n_components, n_features): (36, 7140)
Shape of projected FC onto the first n_comp PCs (n_samples, n_comp): (36, 6)
Shape of reconstructed FC matrix (n_samples, n_features): (36, 7140)

In [12]:

Copied!





from sklearn.linear_model import LinearRegression
# Create the figure and subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15,10), sharex=True)
axes = axes.flatten()  # Flatten the axes array for easy indexing

# Ensure axes is iterable even if n_comp == 1
if n_comp == 1:
    axes = [axes]

for i in range(n_comp):
    ax = axes[i]
    ax.plot(range(fc_projected.shape[0]), fc_projected[:, i], marker='o', label=f'PC{i+1}')

    # Fit linear regression
    X = np.arange(fc_projected.shape[0]).reshape(-1, 1)
    y = fc_projected[:, i]
    reg = LinearRegression().fit(X, y)
    y_pred = reg.predict(X)
    ax.plot(range(fc_projected.shape[0]), y_pred, color='green', linestyle='--', label='Linear Fit')

    ax.set_xticks(np.arange(len(ses_index)))
    ax.set_xticklabels(ses_index, rotation=90)
    ax.set_xlabel('Session Index', fontsize=12)
    ax.set_ylabel('PC Weight', fontsize=12)
    ax.set_title(f'PC{i+1}', fontsize=14)
    ax.legend()

plt.tight_layout()
plt.show()
from sklearn.linear_model import LinearRegression
# Create the figure and subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15,10), sharex=True)
axes = axes.flatten()  # Flatten the axes array for easy indexing

# Ensure axes is iterable even if n_comp == 1
if n_comp == 1:
    axes = [axes]

for i in range(n_comp):
    ax = axes[i]
    ax.plot(range(fc_projected.shape[0]), fc_projected[:, i], marker='o', label=f'PC{i+1}')

    # Fit linear regression
    X = np.arange(fc_projected.shape[0]).reshape(-1, 1)
    y = fc_projected[:, i]
    reg = LinearRegression().fit(X, y)
    y_pred = reg.predict(X)
    ax.plot(range(fc_projected.shape[0]), y_pred, color='green', linestyle='--', label='Linear Fit')

    ax.set_xticks(np.arange(len(ses_index)))
    ax.set_xticklabels(ses_index, rotation=90)
    ax.set_xlabel('Session Index', fontsize=12)
    ax.set_ylabel('PC Weight', fontsize=12)
    ax.set_title(f'PC{i+1}', fontsize=14)
    ax.legend()

plt.tight_layout()
plt.show()

Compute the residual matrix¶

In [13]:

Copied!

# Calculate the residuals as the difference between the full and reconstructed FC matrices
residuals = fc_concat - fc_reconstructed
print(f"Residuals matrix shape (n_sessions, n_features): {residuals.shape}")
# Calculate the residuals as the difference between the full and reconstructed FC matrices
residuals = fc_concat - fc_reconstructed
print(f"Residuals matrix shape (n_sessions, n_features): {residuals.shape}")

Residuals matrix shape (n_sessions, n_features): (36, 7140)

In [14]:

Copied!





import numpy as np
from sklearn.decomposition import PCA

# Project the data onto the residual components (from n_comp + 1 onwards)
residuals_frompc = pca.transform(fc_concat)[:, n_comp:]

# Reconstruct the data using the residual components
residuals_frompc = np.dot(residuals_frompc, pca.components_[n_comp:, :]) + np.mean(fc_concat, axis=0)

# Print the shape of the reconstructed data
print(f"Residuals from PC matrix shape (n_sessions, n_features): {residuals_frompc.shape}")
import numpy as np
from sklearn.decomposition import PCA

# Project the data onto the residual components (from n_comp + 1 onwards)
residuals_frompc = pca.transform(fc_concat)[:, n_comp:]

# Reconstruct the data using the residual components
residuals_frompc = np.dot(residuals_frompc, pca.components_[n_comp:, :]) + np.mean(fc_concat, axis=0)

# Print the shape of the reconstructed data
print(f"Residuals from PC matrix shape (n_sessions, n_features): {residuals_frompc.shape}")

Residuals from PC matrix shape (n_sessions, n_features): (36, 7140)

Plot residual matrix¶

Sanity check the reconstruction function¶

In [15]:

Copied!





# Test my reconstruction function
upper_triangle = fc_matrix.values[np.triu_indices_from(fc_matrix, k=0)]
full_matrix = reconstruct_full_matrix(upper_triangle, fc_size)
full_matrix.shape
assert np.array_equal(full_matrix, fc_matrix.values)
# Test my reconstruction function
upper_triangle = fc_matrix.values[np.triu_indices_from(fc_matrix, k=0)]
full_matrix = reconstruct_full_matrix(upper_triangle, fc_size)
full_matrix.shape
assert np.array_equal(full_matrix, fc_matrix.values)

In [16]:

Copied!





residual = reconstruct_full_matrix(residuals[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()
residual = reconstruct_full_matrix(residuals[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()

In [17]:

Copied!





# Check whether reconstructing the residuals a different way leads to the same result
residual_frompc = reconstruct_full_matrix(residuals_frompc[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual_frompc, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()
# Check whether reconstructing the residuals a different way leads to the same result
residual_frompc = reconstruct_full_matrix(residuals_frompc[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual_frompc, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()

Plot the original FC, reconstructed FC and residual matrix for each session¶

In [18]:

Copied!





# Iterate over each session
for s in range(fc_concat.shape[0]):
    # Reconstruct the full matrices for original FC, reconstructed FC, and residuals
    original_fc = reconstruct_full_matrix(fc_concat[s, :], fc_size)
    reconstructed_fc = reconstruct_full_matrix(fc_reconstructed[s, :], fc_size)
    residual_fc = reconstruct_full_matrix(residuals[s, :], fc_size)

    # Create a 1x3 subplot
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # Plot the original FC matrix
    sns.heatmap(original_fc, cmap='viridis', ax=axes[0])
    axes[0].set_title(f'Session {ses_index[s]}: Original FC', fontsize=14)
    
    # Plot the reconstructed FC matrix
    sns.heatmap(reconstructed_fc, cmap='viridis', ax=axes[1])
    axes[1].set_title(f'Session {ses_index[s]}: Reconstructed FC', fontsize=14)
    
    # Plot the residual matrix
    sns.heatmap(residual_fc, cmap='viridis', ax=axes[2])
    axes[2].set_title(f'Session {ses_index[s]}: Residual', fontsize=14)
    
    # Adjust layout and display the plot
    plt.tight_layout()
    plt.show()
# Iterate over each session
for s in range(fc_concat.shape[0]):
    # Reconstruct the full matrices for original FC, reconstructed FC, and residuals
    original_fc = reconstruct_full_matrix(fc_concat[s, :], fc_size)
    reconstructed_fc = reconstruct_full_matrix(fc_reconstructed[s, :], fc_size)
    residual_fc = reconstruct_full_matrix(residuals[s, :], fc_size)

    # Create a 1x3 subplot
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # Plot the original FC matrix
    sns.heatmap(original_fc, cmap='viridis', ax=axes[0])
    axes[0].set_title(f'Session {ses_index[s]}: Original FC', fontsize=14)
    
    # Plot the reconstructed FC matrix
    sns.heatmap(reconstructed_fc, cmap='viridis', ax=axes[1])
    axes[1].set_title(f'Session {ses_index[s]}: Reconstructed FC', fontsize=14)
    
    # Plot the residual matrix
    sns.heatmap(residual_fc, cmap='viridis', ax=axes[2])
    axes[2].set_title(f'Session {ses_index[s]}: Residual', fontsize=14)
    
    # Adjust layout and display the plot
    plt.tight_layout()
    plt.show()

Plot the variance of the residuals over sessions¶

In [19]:

Copied!





def plot_res_variance(residuals, groupedby=False, min_res_var=0, max_res_var=1000, ses_id=None):
    from scipy.optimize import curve_fit
    from sklearn.metrics import r2_score
    
    # Compute the variance of the residuals for each session
    residuals_var = np.var(residuals, axis=1)
    min_res_var = min(min_res_var, min(residuals_var))
    max_res_var = max(max_res_var, max(residuals_var))

    # Plot the variance of the residuals as a function of the session index
    plt.plot(range(len(residuals_var)), residuals_var, marker='o', linestyle='-', label='Variance of Residuals')

    # Fit and plot a negative exponential curve
    def neg_exp(x, a, b, c):
        return a * np.exp(-b * x) + c

    if residuals_var.shape[0] > 3:
        #popt, _ = curve_fit(neg_exp, range(len(residuals_var)), residuals_var, p0=(1, 0.01, 1))
        #neg_exp_r2 = r2_score(residuals_var, neg_exp(range(len(residuals_var)), *popt))
        #plt.plot(range(len(residuals_var)), neg_exp(range(len(residuals_var)), *popt), linestyle='--', label=f'Negative Exponential Fit (R$^2$={neg_exp_r2:.2f})')
        
        # Fit and plot a quadratic curve
        quadratic_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 2))
        quadratic_r2 = r2_score(residuals_var, quadratic_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), quadratic_model(range(len(residuals_var))), linestyle='--', label=f'Quadratic Fit (R$^2$={quadratic_r2:.2f})')

    if residuals_var.shape[0] > 1:
        # Fit and plot a linear curve
        linear_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 1))
        linear_r2 = r2_score(residuals_var, linear_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), linear_model(range(len(residuals_var))), linestyle='--', label=f'Linear Fit (R$^2$={linear_r2:.2f})')
        
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)
        
    return min_res_var, max_res_var
   
def plot_res_variance(residuals, groupedby=False, min_res_var=0, max_res_var=1000, ses_id=None):
    from scipy.optimize import curve_fit
    from sklearn.metrics import r2_score
    
    # Compute the variance of the residuals for each session
    residuals_var = np.var(residuals, axis=1)
    min_res_var = min(min_res_var, min(residuals_var))
    max_res_var = max(max_res_var, max(residuals_var))

    # Plot the variance of the residuals as a function of the session index
    plt.plot(range(len(residuals_var)), residuals_var, marker='o', linestyle='-', label='Variance of Residuals')

    # Fit and plot a negative exponential curve
    def neg_exp(x, a, b, c):
        return a * np.exp(-b * x) + c

    if residuals_var.shape[0] > 3:
        #popt, _ = curve_fit(neg_exp, range(len(residuals_var)), residuals_var, p0=(1, 0.01, 1))
        #neg_exp_r2 = r2_score(residuals_var, neg_exp(range(len(residuals_var)), *popt))
        #plt.plot(range(len(residuals_var)), neg_exp(range(len(residuals_var)), *popt), linestyle='--', label=f'Negative Exponential Fit (R$^2$={neg_exp_r2:.2f})')
        
        # Fit and plot a quadratic curve
        quadratic_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 2))
        quadratic_r2 = r2_score(residuals_var, quadratic_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), quadratic_model(range(len(residuals_var))), linestyle='--', label=f'Quadratic Fit (R$^2$={quadratic_r2:.2f})')

    if residuals_var.shape[0] > 1:
        # Fit and plot a linear curve
        linear_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 1))
        linear_r2 = r2_score(residuals_var, linear_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), linear_model(range(len(residuals_var))), linestyle='--', label=f'Linear Fit (R$^2$={linear_r2:.2f})')
        
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)
        
    return min_res_var, max_res_var
   

In [20]:

Copied!





# Exclude 30th column
#residuals_wo_outlier = np.delete(residuals, 30, axis=1)

plt.figure(figsize=(10, 6))

# Plot with residuals
#plt.subplot(1, 2, 1)
plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)), labels=ses_index, rotation=90)
plt.xlabel('Session Index', fontsize=16)
plt.ylabel('Variance of Residuals', fontsize=16)
#plt.title('All Sessions', fontsize=16)
#plt.ylim(0.06, 0.24)

"""
# Originally in the abstract I had investigated the effect of excluding the outlier session on the variance of the residuals.
# Plot with residuals_wo_outlier
plt.subplot(1, 2, 2)
plot_res_variance(residuals_wo_outlier)
#plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)-1), labels=np.delete(ses_index, 30), rotation=90)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Variance of Residuals', fontsize=14)
plt.title('Excluding the Outlier', fontsize=16)
#plt.ylim(0.06, 0.24)
"""


plt.tight_layout()
plt.show()
# Exclude 30th column
#residuals_wo_outlier = np.delete(residuals, 30, axis=1)

plt.figure(figsize=(10, 6))

# Plot with residuals
#plt.subplot(1, 2, 1)
plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)), labels=ses_index, rotation=90)
plt.xlabel('Session Index', fontsize=16)
plt.ylabel('Variance of Residuals', fontsize=16)
#plt.title('All Sessions', fontsize=16)
#plt.ylim(0.06, 0.24)

"""
# Originally in the abstract I had investigated the effect of excluding the outlier session on the variance of the residuals.
# Plot with residuals_wo_outlier
plt.subplot(1, 2, 2)
plot_res_variance(residuals_wo_outlier)
#plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)-1), labels=np.delete(ses_index, 30), rotation=90)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Variance of Residuals', fontsize=14)
plt.title('Excluding the Outlier', fontsize=16)
#plt.ylim(0.06, 0.24)
"""


plt.tight_layout()
plt.show()

Plot edgewise residual over sessions¶

In [21]:

Copied!





def plot_edgewise_residual(residuals, groupedby=False, ses_id=None):
    # Calculate the interquartile range (IQR)
    q1 = np.percentile(residuals, 25, axis=1)
    q3 = np.percentile(residuals, 75, axis=1)

    # Plot the evolution of the residual for each edge
    for i in range(residuals.shape[1]):
        plt.plot(range(residuals.shape[0]), residuals[:,i], color='blue', alpha=0.1)

    # Plot the inter-quartile
    plt.plot(range(residuals.shape[0]), q1, color='orange', label='25th percentile')
    plt.plot(range(residuals.shape[0]), q3, color='orange', label='75th percentile')
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)
def plot_edgewise_residual(residuals, groupedby=False, ses_id=None):
    # Calculate the interquartile range (IQR)
    q1 = np.percentile(residuals, 25, axis=1)
    q3 = np.percentile(residuals, 75, axis=1)

    # Plot the evolution of the residual for each edge
    for i in range(residuals.shape[1]):
        plt.plot(range(residuals.shape[0]), residuals[:,i], color='blue', alpha=0.1)

    # Plot the inter-quartile
    plt.plot(range(residuals.shape[0]), q1, color='orange', label='25th percentile')
    plt.plot(range(residuals.shape[0]), q3, color='orange', label='75th percentile')
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)

In [22]:

Copied!





plot_edgewise_residual(residuals)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Residual Value', fontsize=14)
plt.show()
plot_edgewise_residual(residuals)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Residual Value', fontsize=14)
plt.show()

Retrieve confounds¶

Retrieve scan time and day of week from the `scans.tsv` in the dataset repo¶

In [38]:

Copied!





# Retrieve the confounds from the scans.tsv
confounds_of_interest = [
    'caffeine_2h', 
    'caffeine_24h'
]

scans_df = get_confounds(dataset_path, confound_path, confounds_of_interest, iqms_path=iqms_path)

# Keep only the magnitude part of functional scans and first echo
scans_df = scans_df[scans_df["modality"] == "func"]

# Create dataframe based on the list of files
files_df = pd.DataFrame(files, columns=['file_path'])
files_df = files_df.assign(
    subject=files_df["file_path"].str.extract(r"sub-(\d+)/"),
    session=files_df["file_path"].str.extract(r"ses-(\w+)/"),
    task=files_df["file_path"].str.extract(r"task-(\w+)_"),
)

#Pair filenames with acquisition time
confounds_df = pd.merge(
        scans_df, files_df, on=["subject", "session", "task"], how="inner"
    )
confounds_df.drop(columns=["file_path"], inplace=True)

print(confounds_df.head())
# Retrieve the confounds from the scans.tsv
confounds_of_interest = [
    'caffeine_2h', 
    'caffeine_24h'
]

scans_df = get_confounds(dataset_path, confound_path, confounds_of_interest, iqms_path=iqms_path)

# Keep only the magnitude part of functional scans and first echo
scans_df = scans_df[scans_df["modality"] == "func"]

# Create dataframe based on the list of files
files_df = pd.DataFrame(files, columns=['file_path'])
files_df = files_df.assign(
    subject=files_df["file_path"].str.extract(r"sub-(\d+)/"),
    session=files_df["file_path"].str.extract(r"ses-(\w+)/"),
    task=files_df["file_path"].str.extract(r"task-(\w+)_"),
)

#Pair filenames with acquisition time
confounds_df = pd.merge(
        scans_df, files_df, on=["subject", "session", "task"], how="inner"
    )
confounds_df.drop(columns=["file_path"], inplace=True)

print(confounds_df.head())

Warning: Only the IQMs corresponding to the second echo are retained in the IQMs DataFrame.
                                            filename  \
0  sub-001/ses-001/func/sub-001_ses-001_task-rsmo...   
1  sub-001/ses-003/func/sub-001_ses-003_task-rsmo...   
2  sub-001/ses-004/func/sub-001_ses-004_task-rsmo...   
3  sub-001/ses-005/func/sub-001_ses-005_task-rsmo...   
4  sub-001/ses-006/func/sub-001_ses-006_task-rsmo...   

                     acq_time subject session modality     task pe_dir  \
0  2023-10-20T19:50:01.187500     001     001     func  rsmovie     LR   
1  2023-10-21T10:12:32.207500     001     003     func  rsmovie     LR   
2  2023-10-21T12:22:08.222500     001     004     func  rsmovie     RL   
3  2023-10-22T10:33:53.380000     001     005     func  rsmovie     PA   
4  2023-10-22T12:18:06.200000     001     006     func  rsmovie     PA   

  day_of_week time_of_day   fd_mean  caffeine_2h  caffeine_24h  
0      Friday    20:00:00  0.119245          0.0           2.0  
1    Saturday    10:00:00  0.096035          1.0           1.0  
2    Saturday    12:00:00  0.146882          0.0           1.0  
3      Sunday    11:00:00  0.112293          1.0           1.0  
4      Sunday    12:00:00  0.088701          1.0           1.0

In [39]:

Copied!





# Merge residuals with confounds_df to get the time_of_day for each session
residuals_df = pd.DataFrame(residuals)
print("Shape of residuals array (n_samples, n_features):", residuals.shape)
residuals_df = residuals_df.assign(
    session = files_df['session'],
)
residuals_df = pd.merge(
    residuals_df, 
    confounds_df[['session', 'day_of_week', 'time_of_day', 'pe_dir', 'caffeine_2h', 'caffeine_24h']], 
    on='session'
)
print("Shape of residuals_df after merging with confounds_df:", residuals_df.shape)
print("First five rows of residuals_df after merge:")
print(residuals_df.head())
# Merge residuals with confounds_df to get the time_of_day for each session
residuals_df = pd.DataFrame(residuals)
print("Shape of residuals array (n_samples, n_features):", residuals.shape)
residuals_df = residuals_df.assign(
    session = files_df['session'],
)
residuals_df = pd.merge(
    residuals_df, 
    confounds_df[['session', 'day_of_week', 'time_of_day', 'pe_dir', 'caffeine_2h', 'caffeine_24h']], 
    on='session'
)
print("Shape of residuals_df after merging with confounds_df:", residuals_df.shape)
print("First five rows of residuals_df after merge:")
print(residuals_df.head())

Shape of residuals array (n_samples, n_features): (36, 7140)
Shape of residuals_df after merging with confounds_df: (36, 7146)
First five rows of residuals_df after merge:
              0         1         2         3         4         5         6  \
0 -1.731915e-15  0.008568 -0.014353 -0.080859 -0.002130 -0.012745 -0.011353   
1 -1.081627e-15  0.001091  0.020877 -0.020616  0.032127  0.067218  0.034650   
2 -8.339862e-16  0.016150 -0.056996 -0.079078 -0.110628 -0.012115  0.010908   
3  8.614996e-17  0.013174 -0.056856 -0.045357  0.031062  0.009405 -0.018774   
4 -8.310795e-16 -0.030164  0.134168  0.060146  0.034667  0.028640  0.026211   

          7         8         9  ...      7136  7137      7138  7139  session  \
0  0.002458  0.055776  0.125133  ...  0.026551   0.0  0.031856   0.0      001   
1  0.029521 -0.033804 -0.107729  ...  0.031296   0.0  0.035877   0.0      003   
2  0.011550  0.001944  0.015197  ...  0.000338   0.0 -0.024277   0.0      004   
3 -0.024694  0.006120 -0.001600  ... -0.009069   0.0  0.026941   0.0      005   
4  0.040799 -0.000657 -0.049267  ... -0.041234   0.0 -0.006108   0.0      006   

   day_of_week  time_of_day  pe_dir  caffeine_2h  caffeine_24h  
0       Friday     20:00:00      LR          0.0           2.0  
1     Saturday     10:00:00      LR          1.0           1.0  
2     Saturday     12:00:00      RL          0.0           1.0  
3       Sunday     11:00:00      PA          1.0           1.0  
4       Sunday     12:00:00      PA          1.0           1.0  

[5 rows x 7146 columns]

In [40]:

Copied!





res = residuals_df.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir']).values.T
residuals_df = residuals_df.assign(
    variance = np.var(res, axis=0)
)
# Cast strings to type compatible with statistical models
residuals_df["session"] = pd.to_numeric(residuals_df["session"])
residuals_df["caffeine_2h"] = pd.to_numeric(residuals_df["caffeine_2h"])
residuals_df["caffeine_24h"] = pd.to_numeric(residuals_df["caffeine_24h"])
residuals_df["day_of_week"] = residuals_df["day_of_week"].astype("category")
residuals_df["time_of_day"] = residuals_df["time_of_day"].astype("category")
residuals_df["pe_dir"] = residuals_df["pe_dir"].astype("category")
print(residuals_df.head())

print("Here are the characteristics of the session with the highest variance:")
max_variance_row = residuals_df.loc[residuals_df['variance'].idxmax()]
print(max_variance_row)
residuals_wo_outlier_df = residuals_df.drop(max_variance_row.name)
res = residuals_df.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir']).values.T
residuals_df = residuals_df.assign(
    variance = np.var(res, axis=0)
)
# Cast strings to type compatible with statistical models
residuals_df["session"] = pd.to_numeric(residuals_df["session"])
residuals_df["caffeine_2h"] = pd.to_numeric(residuals_df["caffeine_2h"])
residuals_df["caffeine_24h"] = pd.to_numeric(residuals_df["caffeine_24h"])
residuals_df["day_of_week"] = residuals_df["day_of_week"].astype("category")
residuals_df["time_of_day"] = residuals_df["time_of_day"].astype("category")
residuals_df["pe_dir"] = residuals_df["pe_dir"].astype("category")
print(residuals_df.head())

print("Here are the characteristics of the session with the highest variance:")
max_variance_row = residuals_df.loc[residuals_df['variance'].idxmax()]
print(max_variance_row)
residuals_wo_outlier_df = residuals_df.drop(max_variance_row.name)

              0         1         2         3         4         5         6  \
0 -1.731915e-15  0.008568 -0.014353 -0.080859 -0.002130 -0.012745 -0.011353   
1 -1.081627e-15  0.001091  0.020877 -0.020616  0.032127  0.067218  0.034650   
2 -8.339862e-16  0.016150 -0.056996 -0.079078 -0.110628 -0.012115  0.010908   
3  8.614996e-17  0.013174 -0.056856 -0.045357  0.031062  0.009405 -0.018774   
4 -8.310795e-16 -0.030164  0.134168  0.060146  0.034667  0.028640  0.026211   

          7         8         9  ...  7137      7138  7139  session  \
0  0.002458  0.055776  0.125133  ...   0.0  0.031856   0.0        1   
1  0.029521 -0.033804 -0.107729  ...   0.0  0.035877   0.0        3   
2  0.011550  0.001944  0.015197  ...   0.0 -0.024277   0.0        4   
3 -0.024694  0.006120 -0.001600  ...   0.0  0.026941   0.0        5   
4  0.040799 -0.000657 -0.049267  ...   0.0 -0.006108   0.0        6   

   day_of_week  time_of_day  pe_dir  caffeine_2h  caffeine_24h  variance  
0       Friday     20:00:00      LR          0.0           2.0  0.002463  
1     Saturday     10:00:00      LR          1.0           1.0  0.002472  
2     Saturday     12:00:00      RL          0.0           1.0  0.002126  
3       Sunday     11:00:00      PA          1.0           1.0  0.001782  
4       Sunday     12:00:00      PA          1.0           1.0  0.002011  

[5 rows x 7147 columns]
Here are the characteristics of the session with the highest variance:
0                    0.0
1                -0.0818
2               0.034087
3              -0.179126
4              -0.102308
                  ...   
time_of_day     20:00:00
pe_dir                RL
caffeine_2h          0.0
caffeine_24h         2.0
variance        0.004153
Name: 21, Length: 7147, dtype: object

Test habituation significance¶

In [41]:

Copied!





# Fit the linear mixed effects model
X = sm.add_constant(residuals_df["session"]) # Adds the intercept term
y = residuals_df["variance"]
model = sm.OLS(y, X).fit()
print(model.summary())
# Fit the linear mixed effects model
X = sm.add_constant(residuals_df["session"]) # Adds the intercept term
y = residuals_df["variance"]
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               variance   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                 -0.013
Method:                 Least Squares   F-statistic:                    0.5514
Date:                Wed, 08 Apr 2026   Prob (F-statistic):              0.463
Time:                        11:40:22   Log-Likelihood:                 217.20
No. Observations:                  36   AIC:                            -430.4
Df Residuals:                      34   BIC:                            -427.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0027      0.000     14.080      0.000       0.002       0.003
session     4.997e-06   6.73e-06      0.743      0.463   -8.68e-06    1.87e-05
==============================================================================
Omnibus:                        1.571   Durbin-Watson:                   1.210
Prob(Omnibus):                  0.456   Jarque-Bera (JB):                1.178
Skew:                           0.441   Prob(JB):                        0.555
Kurtosis:                       2.906   Cond. No.                         53.5
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [42]:

Copied!





# #Statistics excluding the outlier
# # Fit the linear mixed effects model
# X = sm.add_constant(residuals_wo_outlier_df["session"]) # Adds the intercept term
# y = residuals_wo_outlier_df["variance"]
# model_wo_outlier = sm.OLS(y, X).fit()
# print(model_wo_outlier.summary())
# #Statistics excluding the outlier
# # Fit the linear mixed effects model
# X = sm.add_constant(residuals_wo_outlier_df["session"]) # Adds the intercept term
# y = residuals_wo_outlier_df["variance"]
# model_wo_outlier = sm.OLS(y, X).fit()
# print(model_wo_outlier.summary())

In [43]:

Copied!

# from statsmodels.stats.multitest import multipletests

# # Assuming you have a list of p-values from multiple tests
# p_values = [model.pvalues["session"], model_wo_outlier.pvalues["session"]]

# # Apply Bonferroni correction
# corrected_p_values = multipletests(p_values, method='bonferroni')[1]

# print(f"Original p-values: {p_values}")
# print(f"Corrected p-values: {corrected_p_values}")
# from statsmodels.stats.multitest import multipletests

# # Assuming you have a list of p-values from multiple tests
# p_values = [model.pvalues["session"], model_wo_outlier.pvalues["session"]]

# # Apply Bonferroni correction
# corrected_p_values = multipletests(p_values, method='bonferroni')[1]

# print(f"Original p-values: {p_values}")
# print(f"Corrected p-values: {corrected_p_values}")

Plot residuals grouped by bins of confound factor¶

Day of week¶

In [ ]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'day'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 3, figsize=(20, 20))
min_res_var = 1000
max_res_var = 0
for i, (day_of_week, group) in enumerate(residuals_df.groupby('day_of_week', observed=False)):
    plt.sca(axes[i // 3, i % 3])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=day_of_week, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)
    
# Hide the last two subplots (axes[2, 1] and axes[2, 2])
axes[2, 1].axis('off')
axes[2, 2].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by day of week.**" \ 
"We fitted negative exponential and quadratic curves only if the plot had more than three points."
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'day'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 3, figsize=(20, 20))
min_res_var = 1000
max_res_var = 0
for i, (day_of_week, group) in enumerate(residuals_df.groupby('day_of_week', observed=False)):
    plt.sca(axes[i // 3, i % 3])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=day_of_week, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)
    
# Hide the last two subplots (axes[2, 1] and axes[2, 2])
axes[2, 1].axis('off')
axes[2, 2].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by day of week.**" \ 
"We fitted negative exponential and quadratic curves only if the plot had more than three points."
save_caption(caption, save_layout, entities)

Time of day¶

In [ ]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'time'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 4, figsize=(30, 20))
min_res_var = 1000
max_res_var = 0
for i, (time_of_day, group) in enumerate(residuals_df.groupby('time_of_day', observed=False)):
    print("Time of day:", time_of_day)
    plt.sca(axes[i // 4, i % 4])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    print("Shape of residuals:", res.shape)
    if res.shape[1] > 1:
        min_res_var, max_res_var = plot_res_variance(res, groupedby=time_of_day, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.01, max_res_var+0.01)

 # Hide the last axis
axes[2, 2].axis('off')
axes[2, 3].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by time of day.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'time'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 4, figsize=(30, 20))
min_res_var = 1000
max_res_var = 0
for i, (time_of_day, group) in enumerate(residuals_df.groupby('time_of_day', observed=False)):
    print("Time of day:", time_of_day)
    plt.sca(axes[i // 4, i % 4])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    print("Shape of residuals:", res.shape)
    if res.shape[1] > 1:
        min_res_var, max_res_var = plot_res_variance(res, groupedby=time_of_day, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.01, max_res_var+0.01)

 # Hide the last axis
axes[2, 2].axis('off')
axes[2, 3].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by time of day.**"
save_caption(caption, save_layout, entities)

Time of day: 09:00:00
Shape of residuals: (1, 7141)
Time of day: 10:00:00
Shape of residuals: (2, 7141)
Time of day: 11:00:00
Shape of residuals: (2, 7141)
Time of day: 12:00:00
Shape of residuals: (3, 7141)
Time of day: 18:00:00
Shape of residuals: (2, 7141)
Time of day: 19:00:00
Shape of residuals: (2, 7141)
Time of day: 20:00:00
Shape of residuals: (11, 7141)
Time of day: 21:00:00
Shape of residuals: (5, 7141)
Time of day: 22:00:00
Shape of residuals: (7, 7141)
Time of day: 23:00:00
Shape of residuals: (1, 7141)

Phase Encoding Direction¶

In [ ]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=pe_dir, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by phase encoding direction.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=pe_dir, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by phase encoding direction.**"
save_caption(caption, save_layout, entities)

In [87]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=pe_dir, ses_id=ses_id)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=pe_dir, ses_id=ses_id)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

/tmp/ipykernel_538929/2248055642.py:18: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  plt.savefig(save_path)
/home/cprovins/miniconda3/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  fig.canvas.print_figure(bytes_io, **kw)

Caffeine consumption 2h before¶

In [88]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(21, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0001, max_res_var+0.0001)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(21, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0001, max_res_var+0.0001)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)

In [ ]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(19, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id= ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(19, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id= ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)

Caffeine consumption 24h before¶

In [90]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)

In [ ]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id=ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id=ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)

Do some confound explain why ses-043 is seemingly less correlated to the other sessions? Can we find a reason that justifies its exclusion from the fMRI analysis?¶

In [92]:

Copied!





confounds_path = '/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv'
confounds_df = pd.read_csv(confounds_path, sep='\t')
# Keep only the confounds from the reliability sessions
confounds_df = confounds_df[confounds_df['session_number'].astype(str).str.zfill(3).str.startswith('0')]
confounds_df['session_number'] = confounds_df['session_number'].astype(str).str.zfill(3)
confounds_df.head()
confounds_path = '/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv'
confounds_df = pd.read_csv(confounds_path, sep='\t')
# Keep only the confounds from the reliability sessions
confounds_df = confounds_df[confounds_df['session_number'].astype(str).str.zfill(3).str.startswith('0')]
confounds_df['session_number'] = confounds_df['session_number'].astype(str).str.zfill(3)
confounds_df.head()

Out[92]:

	participant_id	session_number	scanner	pe_dir	date	time	weather_temperature	weather_wind	weather_precipitation	weather_precipitation_type	...	slept_qct	slept_rest	slept_bht	rumination	anxiety	physical_pain	time_slept_t1w	time_slept_rest	time_slept_diffusion	details_rumination_anxiety_pain
37	1	048	Prisma	PA	11-14-2023	20:45	11-14	10.4	43.0	4 Rain or drizzle	...	No	No	No	3.0	6.0	6.0	NaN	NaN	NaN	NaN
38	1	047	Prisma	RL	11-14-2023	18:50	11-14	10.4	43.0	4 Rain or drizzle	...	No	No	No	3.0	6.0	5.0	NaN	NaN	NaN	NaN
39	1	046	Prisma	LR	11-13-2023	20:15	10-16	8.7	7.0	2 Scattered precipitation (liquid)	...	No	No	No	2.0	6.0	6.0	NaN	NaN	NaN	NaN
40	1	045	Prisma	LR	11-13-2023	18:30	10-16	8.7	7.0	2 Scattered precipitation (liquid)	...	No	No	No	3.0	6.0	6.0	NaN	NaN	NaN	NaN
41	1	044	Prisma	LR	11-12-2023	11:10	5-10	7.0	0.1	2 Scattered precipitation (liquid)	...	No	No	No	3.0	6.0	6.0	NaN	NaN	NaN	NaN

5 rows × 78 columns

In [93]:

Copied!

confound_to_plot = confounds_df.columns.drop(['participant_id','session_number', 'scanner', 'pe_dir', 'remove_jewelry', 'mr_room_helium', 'details_rumination_anxiety_pain', 'time_slept_t1w', 'time_slept_rest', 'time_slept_diffusion'])
confound_to_plot
confound_to_plot = confounds_df.columns.drop(['participant_id','session_number', 'scanner', 'pe_dir', 'remove_jewelry', 'mr_room_helium', 'details_rumination_anxiety_pain', 'time_slept_t1w', 'time_slept_rest', 'time_slept_diffusion'])
confound_to_plot

Out[93]:

Index(['date', 'time', 'weather_temperature', 'weather_wind',
       'weather_precipitation', 'weather_precipitation_type',
       'weather_pressure', 'weather_humidity', 'weather_daylight',
       'blood_pressure', 'caffeine_24h', 'caffeine_2h', 'alcohol', 'triptans',
       'nsaids', 'weight', 'general_health', 'general_stress',
       'ease_concentration', 'beh_work', 'beh_freetime', 'beh_sport',
       'beh_outdoors', 'beh_electronic_devices', 'beh_active_social',
       'beh_passive_social', 'sleep_quality', 'remember_dreams', 'day_dreams',
       'time_to_bed', 'time_in_bed', 'steps', 'distance_walked',
       'stories_climbed', 'calories', 'interested', 'distressed', 'excited',
       'upset', 'strong', 'guilty', 'scared', 'hostile', 'enthusiastic',
       'proud', 'irritable', 'alert', 'ashamed', 'inspired', 'nervous',
       'determined', 'attentive', 'jittery', 'active', 'afraid',
       'filled_all_entries', 'mr_room_temperature', 'mr_room_humidity',
       'mr_room_pressure', 'slept_session', 'slept_t1w', 'slept_diffusion',
       'slept_qct', 'slept_rest', 'slept_bht', 'rumination', 'anxiety',
       'physical_pain'],
      dtype='object')

In [96]:

Copied!





# Convert 'time' column to datetime, round up to the closest full hour
confounds_df['time'] = pd.to_datetime(confounds_df['time'], format='%H:%M', errors='coerce').dt.round('h').dt.strftime('%H').astype(float)
slept_cols = [col for col in confounds_df.columns if col.startswith('slept')]
confounds_df[slept_cols] = confounds_df[slept_cols].replace({'Yes': 1, 'No': 0})
# Convert 'time' column to datetime, round up to the closest full hour
confounds_df['time'] = pd.to_datetime(confounds_df['time'], format='%H:%M', errors='coerce').dt.round('h').dt.strftime('%H').astype(float)
slept_cols = [col for col in confounds_df.columns if col.startswith('slept')]
confounds_df[slept_cols] = confounds_df[slept_cols].replace({'Yes': 1, 'No': 0})

Visualize the distribution of all confounds and highlight as a red dot the value corresponding to ses-043

In [ ]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'confounddist'}
entities.update(entities_base)

# Filter sessions with '043' in session_number (as string, zero-padded if needed)
highlight_mask = confounds_df['session_number'].astype(str).str.zfill(3).str.contains('043')

n_cols = 5
n_plots = len(confound_to_plot)
n_rows = int(np.ceil(n_plots / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3+10))
axes = axes.flatten()

for idx, confound in enumerate(confound_to_plot):
    ax = axes[idx]
    y = confounds_df[confound]
    x = np.arange(len(y))
    ax.scatter(x, y, alpha=0.7)
    # Highlight session 043
    ax.scatter(x[highlight_mask], y[highlight_mask], color='red', label='Session 043')
    ax.set_title(confound)
    ax.grid(True)

# Hide unused subplots
for j in range(idx + 1, len(axes)):
    axes[j].set_visible(False)

#plt.tight_layout()
plt.show()

caption = f"**Confounds distribution.** To try to understand whether some confound can explain that Session 043 is seemingly less correlated" \  
"to the other sessions, we plotted the distribution of all confounds and highlighted as a red dot the value corresponding to Session 043." \
"Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions." \
"Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor," \
"away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual." \
"This session is also associated with the only time the sleep quality was recorded at the minimum."
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'confounddist'}
entities.update(entities_base)

# Filter sessions with '043' in session_number (as string, zero-padded if needed)
highlight_mask = confounds_df['session_number'].astype(str).str.zfill(3).str.contains('043')

n_cols = 5
n_plots = len(confound_to_plot)
n_rows = int(np.ceil(n_plots / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3+10))
axes = axes.flatten()

for idx, confound in enumerate(confound_to_plot):
    ax = axes[idx]
    y = confounds_df[confound]
    x = np.arange(len(y))
    ax.scatter(x, y, alpha=0.7)
    # Highlight session 043
    ax.scatter(x[highlight_mask], y[highlight_mask], color='red', label='Session 043')
    ax.set_title(confound)
    ax.grid(True)

# Hide unused subplots
for j in range(idx + 1, len(axes)):
    axes[j].set_visible(False)

#plt.tight_layout()
plt.show()

caption = f"**Confounds distribution.** To try to understand whether some confound can explain that Session 043 is seemingly less correlated" \  
"to the other sessions, we plotted the distribution of all confounds and highlighted as a red dot the value corresponding to Session 043." \
"Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions." \
"Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor," \
"away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual." \
"This session is also associated with the only time the sleep quality was recorded at the minimum."
save_caption(caption, save_layout, entities)

Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions. Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor, away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual. This session is also associated with the only time the sleep quality was recorded at the minimum.

Does replaying the same movie clip reduce the variability of resting-state fMRI?¶

Load the functional connectivity matrices¶

Run PCA to estimate the residual variance for each rsfMRI run¶

Choose number of principal components¶

Plot the principal components¶

Reconstruct the FC with a limited number of components¶

Compute the residual matrix¶

Plot residual matrix¶

Sanity check the reconstruction function¶

Plot the original FC, reconstructed FC and residual matrix for each session¶

Plot the variance of the residuals over sessions¶

Plot edgewise residual over sessions¶

Retrieve confounds¶

Retrieve scan time and day of week from the scans.tsv in the dataset repo¶

Test habituation significance¶

Plot residuals grouped by bins of confound factor¶

Day of week¶

Time of day¶

Phase Encoding Direction¶

Caffeine consumption 2h before¶

Caffeine consumption 24h before¶

Do some confound explain why ses-043 is seemingly less correlated to the other sessions? Can we find a reason that justifies its exclusion from the fMRI analysis?¶

Retrieve scan time and day of week from the `scans.tsv` in the dataset repo¶