Does replaying the same movie clip reduce the variability of resting-state fMRI?¶

The interactive version of this notebook is available at https://github.com/TheAxonLab/hcph-sops/tree/mkdocs/docs/analysis.

In [1]:

Copied!





import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import nibabel as nib
from pathlib import Path
from scipy import stats
from statsmodels.stats.multitest import fdrcorrection
from confounds import get_confounds
from load_save import save_caption, load_matrices, init_save_layout

#Define filtering parameters
metric = "correlation"
atlas_dimension = 128
fd_threshold = 0.5
fdthresh_str = str(fd_threshold).replace(".", "")

# Define paths
code_path = Path("/data/code/hcph-sops/")
fc_path = Path(f"/data/derivatives/hcph-fc/")
dataset_path = Path("/data/datasets/hcph-dataset")
suppl_path = Path("/data/derivatives/hcph-suppl/")
confound_path = Path("/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv")
iqms_path = Path("/data/derivatives/hcph-mriqc/group_bold.tsv")
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import nibabel as nib
from pathlib import Path
from scipy import stats
from statsmodels.stats.multitest import fdrcorrection
from confounds import get_confounds
from load_save import save_caption, load_matrices, init_save_layout

#Define filtering parameters
metric = "correlation"
atlas_dimension = 128
fd_threshold = 0.5
fdthresh_str = str(fd_threshold).replace(".", "")

# Define paths
code_path = Path("/data/code/hcph-sops/")
fc_path = Path(f"/data/derivatives/hcph-fc/")
dataset_path = Path("/data/datasets/hcph-dataset")
suppl_path = Path("/data/derivatives/hcph-suppl/")
confound_path = Path("/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv")
iqms_path = Path("/data/derivatives/hcph-mriqc/group_bold.tsv")

Load the functional connectivity matrices¶

In [2]:

Copied!





# Set up entities_base for the matrix loading
entities_base = {
    'subject': '001',
    'task': 'rsmovie',
    'measure': metric,
    'scale': str(atlas_dimension),
    'fd_threshold': fdthresh_str,
}

# Load the FC matrices and associated information using the helper function
matrices_dict = load_matrices(fc_path, entities_base, code_path)
fc_concat = matrices_dict["conn_concat"]
fc_matrix = matrices_dict["conn_matrix"]
files = matrices_dict["files"]
atlas_labels = matrices_dict["atlas_labels"]
region_labels = matrices_dict["region_labels"]
fc_size = matrices_dict["conn_size"]
ses_index = matrices_dict["ses_index"]

# Intialize the BIDSLayout to save the figures and their associated captions      
save_layout = init_save_layout(code_path, suppl_path)

# Plot an example FC matrix (the last one loaded)
fc_matrix = np.zeros((fc_size, fc_size))
# reconstruct from upper triangle if needed (optional, for visualization)
fc_matrix[np.triu_indices(fc_size, k=0)] = fc_concat[-1].flatten()
fc_matrix += np.triu(fc_matrix, k=1).T
plt.figure(figsize=(8,6))
sns.heatmap(fc_matrix, cmap='viridis')
plt.title(f"Example FC matrix")
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')

print(f"Number of brain regions (matrix size): {fc_size}")
print(f"Shape of concatenated upper triangle FC matrices (number of sessions, number of region pairs): {fc_concat.shape}")

# Test whether the shape is as expected
assert fc_concat.shape == (36, fc_size * (fc_size + 1) // 2)
# Set up entities_base for the matrix loading
entities_base = {
    'subject': '001',
    'task': 'rsmovie',
    'measure': metric,
    'scale': str(atlas_dimension),
    'fd_threshold': fdthresh_str,
}

# Load the FC matrices and associated information using the helper function
matrices_dict = load_matrices(fc_path, entities_base, code_path)
fc_concat = matrices_dict["conn_concat"]
fc_matrix = matrices_dict["conn_matrix"]
files = matrices_dict["files"]
atlas_labels = matrices_dict["atlas_labels"]
region_labels = matrices_dict["region_labels"]
fc_size = matrices_dict["conn_size"]
ses_index = matrices_dict["ses_index"]

# Intialize the BIDSLayout to save the figures and their associated captions      
save_layout = init_save_layout(code_path, suppl_path)

# Plot an example FC matrix (the last one loaded)
fc_matrix = np.zeros((fc_size, fc_size))
# reconstruct from upper triangle if needed (optional, for visualization)
fc_matrix[np.triu_indices(fc_size, k=0)] = fc_concat[-1].flatten()
fc_matrix += np.triu(fc_matrix, k=1).T
plt.figure(figsize=(8,6))
sns.heatmap(fc_matrix, cmap='viridis')
plt.title(f"Example FC matrix")
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')

print(f"Number of brain regions (matrix size): {fc_size}")
print(f"Shape of concatenated upper triangle FC matrices (number of sessions, number of region pairs): {fc_concat.shape}")

# Test whether the shape is as expected
assert fc_concat.shape == (36, fc_size * (fc_size + 1) // 2)

[get_dataset_dir] Dataset found in /home/cprovins/nilearn_data/difumo_atlases
Configuration 'hcph' already exists, skipping add_config_paths.
Number of brain regions (matrix size): 119
Shape of concatenated upper triangle FC matrices (number of sessions, number of region pairs): (36, 7140)

No description has been provided for this image

One remark to note is the number of brain regions does not correspond to the atlas_dimension, because we removed from the DiFuMo atlas the components that are specific to the CSF, ventricles and sinuses.

In [3]:

Copied!





# Add the information that apply too all figures generated below to the base entities
entities_base["experiment"] = "fmrivar"
entities_base["extension"] = ".svg"
entities_base["datatype"] = "figures"
entities_base["suffix"] = "bold"
# Add the information that apply too all figures generated below to the base entities
entities_base["experiment"] = "fmrivar"
entities_base["extension"] = ".svg"
entities_base["datatype"] = "figures"
entities_base["suffix"] = "bold"

In [4]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'similmat'}
entities.update(entities_base)

similarity_matrix = np.corrcoef(fc_concat.T, rowvar=False)
print(f"Shape of the similarity matrix (number of sessions x number of sessions): {similarity_matrix.shape}")

mean_corr = np.mean(similarity_matrix)
print(f"Overall mean correlation: {mean_corr}")

plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, cmap='viridis')
plt.xticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index, rotation=90)
plt.yticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Session Index', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Similarity matrix presenting the Pearson correlation between FC matrices for each session.**The overall mean correlation is {mean_corr:.2f}." \
"The fact that the FC matrices are highly correlated probably explains why only two components are enough to explain the vast majority of the variance." \
"It is also visible that session index 30 is less correlated with the other sessions compared to the other pair-wise comparisons."
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'similmat'}
entities.update(entities_base)

similarity_matrix = np.corrcoef(fc_concat.T, rowvar=False)
print(f"Shape of the similarity matrix (number of sessions x number of sessions): {similarity_matrix.shape}")

mean_corr = np.mean(similarity_matrix)
print(f"Overall mean correlation: {mean_corr}")

plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, cmap='viridis')
plt.xticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index, rotation=90)
plt.yticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Session Index', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Similarity matrix presenting the Pearson correlation between FC matrices for each session.**The overall mean correlation is {mean_corr:.2f}." \
"The fact that the FC matrices are highly correlated probably explains why only two components are enough to explain the vast majority of the variance." \
"It is also visible that session index 30 is less correlated with the other sessions compared to the other pair-wise comparisons."
save_caption(caption, save_layout, entities)

Shape of the similarity matrix (number of sessions x number of sessions): (36, 36)
Overall mean correlation: 0.8581150373039423

Run PCA to estimate the residual variance for each rsfMRI run¶

Choose number of principal components¶

We are using the elbow plot to decide the number of principal components to keep.

In [5]:

Copied!





from sklearn.decomposition import PCA
# Entities specific to this plot
entities = {'visualization': 'explainedvar'}
entities.update(entities_base)

# Run PCA
# Center the data by removing the mean of each connection
fc_concat = fc_concat - np.mean(fc_concat, axis=0)

# Standardize the data to have unit variance
#fc_concat = fc_concat / np.std(fc_concat, axis=0)

# Run PCA
pca = PCA()
pca.fit(fc_concat)

# Extract the explained variance
explained_variance = pca.explained_variance_ratio_

# Generate the elbow plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='-')
ax.set_xlabel('Number of Components', fontsize=14)
ax.set_ylabel('Explained Variance Ratio', fontsize=14)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

#Define the number of components to keep
n_comp = 6

# Highlight the points corresponding to number of components we keep in red
ax.plot(n_comp, explained_variance[n_comp-1], marker='o', markersize=8, color='red')
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = "**Elbow plot to choose the number of principal components.**The point in red indicates the number of components selected."
save_caption(caption, save_layout, entities)
from sklearn.decomposition import PCA
# Entities specific to this plot
entities = {'visualization': 'explainedvar'}
entities.update(entities_base)

# Run PCA
# Center the data by removing the mean of each connection
fc_concat = fc_concat - np.mean(fc_concat, axis=0)

# Standardize the data to have unit variance
#fc_concat = fc_concat / np.std(fc_concat, axis=0)

# Run PCA
pca = PCA()
pca.fit(fc_concat)

# Extract the explained variance
explained_variance = pca.explained_variance_ratio_

# Generate the elbow plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='-')
ax.set_xlabel('Number of Components', fontsize=14)
ax.set_ylabel('Explained Variance Ratio', fontsize=14)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

#Define the number of components to keep
n_comp = 6

# Highlight the points corresponding to number of components we keep in red
ax.plot(n_comp, explained_variance[n_comp-1], marker='o', markersize=8, color='red')
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = "**Elbow plot to choose the number of principal components.**The point in red indicates the number of components selected."
save_caption(caption, save_layout, entities)

Plot the principal components¶

In [6]:

Copied!





# Function to reconstruct the full matrix from the upper triangle
def reconstruct_full_matrix(upper_triangle, size):
    full_matrix = np.zeros((size, size))
    upper_indices = np.triu_indices(size)
    full_matrix[upper_indices] = upper_triangle
    full_matrix = full_matrix + full_matrix.T - np.diag(np.diag(full_matrix))
    return full_matrix

# Reorder regions by network
network_order = atlas_labels['yeo_networks7'].argsort()
reordered_regions_labels = atlas_labels.iloc[network_order]['difumo_names']
for i, pc in enumerate(pca.components_[:n_comp, :]):
    # Reconstruct the full matrix from the upper triangle
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    
    # Plot the principal component as a heatmap
    plt.figure(figsize=(20, 18))
    sns.heatmap(full_matrix, cmap='viridis')
    plt.title(f'Principal Component {i+1}')
    plt.xlabel('Brain Regions')
    plt.ylabel('Brain Regions')
    # Adjust y-ticks to move them further away from the heatmap
    plt.yticks(ticks=np.arange(len(reordered_regions_labels)) + 0.5, labels=reordered_regions_labels, va='center', rotation=0)
    plt.gca().tick_params(axis='y', pad=15)  # Move y-ticks further away
    plt.gca().tick_params(axis='x', pad=15)  # Move x-ticks further away

    # Add a color bar on the left and bottom ledge indicating the network
    network_colors = sns.color_palette("tab20", n_colors=7)  # Define 7 colors for the networks
    network_segments = atlas_labels.iloc[network_order]['yeo_networks7'].astype('category').cat.codes

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((-1.5, idx), 1.5, 1, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 1.5, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add legend for the networks
    network_labels = atlas_labels['yeo_networks7'].astype('category').cat.categories
    legend_patches = [plt.Line2D([0], [0], color=network_colors[i], lw=4, label=label) for i, label in enumerate(network_labels)]
    plt.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(1.37, 1), title="Networks", fontsize=12, title_fontsize=14)

# Plot all principal components as heatmaps in a single figure with subplots
n_rows = int(np.ceil(n_comp / 3))
n_cols = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))

for i, pc in enumerate(pca.components_[:n_comp, :]):
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    ax = axes[i // n_cols, i % n_cols]
    sns.heatmap(full_matrix, cmap='viridis', ax=ax, cbar_kws={'shrink': 0.5})
    ax.set_title(f'Principal Component {i+1}')
    ax.set_xlabel('Brain Regions', labelpad=20)
    ax.set_ylabel('Brain Regions', labelpad=20)
    ax.set_xticks([])  # Hide x-ticks
    ax.set_yticks([])  # Hide y-ticks

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((-3, idx), 3, 1, color=network_colors[color], transform=ax.transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 3, color=network_colors[color], transform=ax.transData, clip_on=False))

# Add a single legend for the networks on the right of the figure
fig.legend(
    handles=legend_patches,
    loc='center right',
    bbox_to_anchor=(1.1, 0.5),  # Position the legend outside the figure, vertically centered
    title="Networks",
    fontsize=10,
    title_fontsize=12
)

# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
    axes[j // n_cols, j % n_cols].axis('off')

plt.tight_layout()
plt.show()
# Function to reconstruct the full matrix from the upper triangle
def reconstruct_full_matrix(upper_triangle, size):
    full_matrix = np.zeros((size, size))
    upper_indices = np.triu_indices(size)
    full_matrix[upper_indices] = upper_triangle
    full_matrix = full_matrix + full_matrix.T - np.diag(np.diag(full_matrix))
    return full_matrix

# Reorder regions by network
network_order = atlas_labels['yeo_networks7'].argsort()
reordered_regions_labels = atlas_labels.iloc[network_order]['difumo_names']
for i, pc in enumerate(pca.components_[:n_comp, :]):
    # Reconstruct the full matrix from the upper triangle
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    
    # Plot the principal component as a heatmap
    plt.figure(figsize=(20, 18))
    sns.heatmap(full_matrix, cmap='viridis')
    plt.title(f'Principal Component {i+1}')
    plt.xlabel('Brain Regions')
    plt.ylabel('Brain Regions')
    # Adjust y-ticks to move them further away from the heatmap
    plt.yticks(ticks=np.arange(len(reordered_regions_labels)) + 0.5, labels=reordered_regions_labels, va='center', rotation=0)
    plt.gca().tick_params(axis='y', pad=15)  # Move y-ticks further away
    plt.gca().tick_params(axis='x', pad=15)  # Move x-ticks further away

    # Add a color bar on the left and bottom ledge indicating the network
    network_colors = sns.color_palette("tab20", n_colors=7)  # Define 7 colors for the networks
    network_segments = atlas_labels.iloc[network_order]['yeo_networks7'].astype('category').cat.codes

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((-1.5, idx), 1.5, 1, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        plt.gca().add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 1.5, color=network_colors[color], transform=plt.gca().transData, clip_on=False))

    # Add legend for the networks
    network_labels = atlas_labels['yeo_networks7'].astype('category').cat.categories
    legend_patches = [plt.Line2D([0], [0], color=network_colors[i], lw=4, label=label) for i, label in enumerate(network_labels)]
    plt.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(1.37, 1), title="Networks", fontsize=12, title_fontsize=14)

# Plot all principal components as heatmaps in a single figure with subplots
n_rows = int(np.ceil(n_comp / 3))
n_cols = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))

for i, pc in enumerate(pca.components_[:n_comp, :]):
    full_matrix = reconstruct_full_matrix(pc, fc_size)
    # Cluster regions by network
    full_matrix = full_matrix[network_order, :][:, network_order]
    ax = axes[i // n_cols, i % n_cols]
    sns.heatmap(full_matrix, cmap='viridis', ax=ax, cbar_kws={'shrink': 0.5})
    ax.set_title(f'Principal Component {i+1}')
    ax.set_xlabel('Brain Regions', labelpad=20)
    ax.set_ylabel('Brain Regions', labelpad=20)
    ax.set_xticks([])  # Hide x-ticks
    ax.set_yticks([])  # Hide y-ticks

    # Add color bar on the left with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((-3, idx), 3, 1, color=network_colors[color], transform=ax.transData, clip_on=False))

    # Add color bar on the bottom with increased thickness
    for idx, color in enumerate(network_segments):
        ax.add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 3, color=network_colors[color], transform=ax.transData, clip_on=False))

# Add a single legend for the networks on the right of the figure
fig.legend(
    handles=legend_patches,
    loc='center right',
    bbox_to_anchor=(1.1, 0.5),  # Position the legend outside the figure, vertically centered
    title="Networks",
    fontsize=10,
    title_fontsize=12
)

# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
    axes[j // n_cols, j % n_cols].axis('off')

plt.tight_layout()
plt.show()

In [7]:

Copied!





# Functions to compute mean correlation for both region(s) and network(s) with other regions/networks in a principal component
def mean_region_or_network_correlation(
    query, pc_index, atlas_labels, pca, fc_size, reconstruct_full_matrix_func, 
    query_type="region", with_query=None
):
    """
    Compute the mean correlation of region(s) or network(s) with other regions or networks 
    in a given principal component (PC).

    Args:
        query (str): String to search for in atlas_labels to identify region(s) or network(s) (case-insensitive).
        pc_index (int): Which principal component to use (0-based index).
        atlas_labels (pd.DataFrame): DataFrame with atlas meta-data, must have 'difumo_names' and 'yeo_networks7', etc.
        pca (sklearn.decomposition.PCA): Fitted PCA object.
        fc_size (int): Dimensionality of the FC matrix.
        reconstruct_full_matrix_func (function): Function to reshape flattened PC vector into full symmetric matrix.
        query_type (str): Either "region" (default) or "network".
        with_query (str or None): If provided, compute mean correlation with regions/networks matching this string, instead of with all others.


    Returns:
        float or None: The mean correlation. None if no query match.
    """

    # Select which column will be used for query
    if query_type == "region":
        query_col = 'difumo_names'
    elif query_type == "network":
        # You may want to use another label set for networks (e.g., 'yeo_networks7')
        # Accept both numeric and string matches
        query_col = 'yeo_networks7'
    else:
        raise ValueError(f"query_type must be 'region' or 'network', got {query_type}")

    # Prepare list of indices for main query
    mask_main = atlas_labels[query_col].astype(str).str.contains(query, case=False, na=False)
    indices_main = np.where(mask_main)[0]
    if len(indices_main) == 0:
        print(f"No {query_type}(s) matching '{query}' found in atlas_labels!")
        return None

    # Prepare list of indices for other set (if with_query is given), else use all except main
    if with_query is not None:
        mask_with = atlas_labels[query_col].astype(str).str.contains(with_query, case=False, na=False)
        indices_with = np.where(mask_with)[0]
        if len(indices_with) == 0:
            print(f"No {query_type}(s) matching 'with_query'='{with_query}' found in atlas_labels!")
            return None
        # Remove self-overlap
        indices_with = np.setdiff1d(indices_with, indices_main)
    else:
        # Calculate correlation with all *other* regions/networks
        indices_with = np.setdiff1d(np.arange(atlas_labels.shape[0]), indices_main)

    # Get specified PC weights and reconstruct full matrix
    pc_vec = pca.components_[pc_index, :]
    pc_matrix = reconstruct_full_matrix_func(pc_vec, fc_size)

    mean_corrs = []
    for i in indices_main:
        # Get correlations to all indices_with
        corrs = pc_matrix[i, indices_with]
        # If main and with_query overlap is possible, remove self (shouldn't be for networks)
        corrs = corrs.flatten()  # In case it's size 1
        mean_corrs.append(np.nanmean(corrs))
    mean_correlation = np.mean(mean_corrs)
    msg_type = f"{query_type}(s)"
    other_desc = f"with {query_type}(s) matching '{with_query}'" if with_query is not None else "with all other regions/networks"
    print(f"Mean correlation of {msg_type} matching '{query}' {other_desc} in PC{pc_index+1}: {mean_correlation:.4f}")
    return mean_correlation

# One region with all other regions
mean_region_or_network_correlation(
    query='retrosplenial',
    pc_index=0,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

mean_region_or_network_correlation(
    query='retrosplenial',
    pc_index=1,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

mean_region_or_network_correlation(
    query='internal capsule',
    pc_index=1,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

mean_region_or_network_correlation(
    query='cerebellum crus I posterior',
    pc_index=1,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

# Networks with networks

# PC3: DMN vs ContA, DorsAttnB, SalVentAttnA, visual networks
print("\n--- PC3: DMN vs ContA, DorsAttnB, SalVentAttnA, visual ---")
dmn_vs_ContA_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='ContA',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_dorsal_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='DorsAttnB',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_SalVentAttnA_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='SalVentAttnA',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_visual_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='VisCent',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)

# PC4: DMN vs SomMotA/SalVentAttnA
print("\n--- PC4: DMN vs SomMotA, DMN vs SalVentAttnA ---")
dmn_vs_SomMotA_pc4 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='SomMotA',
    pc_index=3,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_SalVentAttnA_pc4 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='SalVentAttnA',
    pc_index=3,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)

# PC5: DorsAttnB vs DMN, DorsAttnB vs ContA, ContA vs DMN, ContA vs SomMotA
print("\n--- PC5: DorsAttnB vs DMN/ContA, ContA vs DMN/SomMotA ---")
dorsal_vs_dmn_pc5 = mean_region_or_network_correlation(
    query='DorsAttnB',
    with_query='defaultB',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dorsal_vs_ContA_pc5 = mean_region_or_network_correlation(
    query='DorsAttnB',
    with_query='ContA',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
ContA_vs_dmn_pc5 = mean_region_or_network_correlation(
    query='ContA',
    with_query='defaultB',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
ContA_vs_SomMotA_pc5 = mean_region_or_network_correlation(
    query='ContA',
    with_query='SomMotA',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
# Functions to compute mean correlation for both region(s) and network(s) with other regions/networks in a principal component
def mean_region_or_network_correlation(
    query, pc_index, atlas_labels, pca, fc_size, reconstruct_full_matrix_func, 
    query_type="region", with_query=None
):
    """
    Compute the mean correlation of region(s) or network(s) with other regions or networks 
    in a given principal component (PC).

    Args:
        query (str): String to search for in atlas_labels to identify region(s) or network(s) (case-insensitive).
        pc_index (int): Which principal component to use (0-based index).
        atlas_labels (pd.DataFrame): DataFrame with atlas meta-data, must have 'difumo_names' and 'yeo_networks7', etc.
        pca (sklearn.decomposition.PCA): Fitted PCA object.
        fc_size (int): Dimensionality of the FC matrix.
        reconstruct_full_matrix_func (function): Function to reshape flattened PC vector into full symmetric matrix.
        query_type (str): Either "region" (default) or "network".
        with_query (str or None): If provided, compute mean correlation with regions/networks matching this string, instead of with all others.


    Returns:
        float or None: The mean correlation. None if no query match.
    """

    # Select which column will be used for query
    if query_type == "region":
        query_col = 'difumo_names'
    elif query_type == "network":
        # You may want to use another label set for networks (e.g., 'yeo_networks7')
        # Accept both numeric and string matches
        query_col = 'yeo_networks7'
    else:
        raise ValueError(f"query_type must be 'region' or 'network', got {query_type}")

    # Prepare list of indices for main query
    mask_main = atlas_labels[query_col].astype(str).str.contains(query, case=False, na=False)
    indices_main = np.where(mask_main)[0]
    if len(indices_main) == 0:
        print(f"No {query_type}(s) matching '{query}' found in atlas_labels!")
        return None

    # Prepare list of indices for other set (if with_query is given), else use all except main
    if with_query is not None:
        mask_with = atlas_labels[query_col].astype(str).str.contains(with_query, case=False, na=False)
        indices_with = np.where(mask_with)[0]
        if len(indices_with) == 0:
            print(f"No {query_type}(s) matching 'with_query'='{with_query}' found in atlas_labels!")
            return None
        # Remove self-overlap
        indices_with = np.setdiff1d(indices_with, indices_main)
    else:
        # Calculate correlation with all *other* regions/networks
        indices_with = np.setdiff1d(np.arange(atlas_labels.shape[0]), indices_main)

    # Get specified PC weights and reconstruct full matrix
    pc_vec = pca.components_[pc_index, :]
    pc_matrix = reconstruct_full_matrix_func(pc_vec, fc_size)

    mean_corrs = []
    for i in indices_main:
        # Get correlations to all indices_with
        corrs = pc_matrix[i, indices_with]
        # If main and with_query overlap is possible, remove self (shouldn't be for networks)
        corrs = corrs.flatten()  # In case it's size 1
        mean_corrs.append(np.nanmean(corrs))
    mean_correlation = np.mean(mean_corrs)
    msg_type = f"{query_type}(s)"
    other_desc = f"with {query_type}(s) matching '{with_query}'" if with_query is not None else "with all other regions/networks"
    print(f"Mean correlation of {msg_type} matching '{query}' {other_desc} in PC{pc_index+1}: {mean_correlation:.4f}")
    return mean_correlation

# One region with all other regions
mean_region_or_network_correlation(
    query='retrosplenial',
    pc_index=0,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

mean_region_or_network_correlation(
    query='retrosplenial',
    pc_index=1,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

mean_region_or_network_correlation(
    query='internal capsule',
    pc_index=1,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

mean_region_or_network_correlation(
    query='cerebellum crus I posterior',
    pc_index=1,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="region"
)

# Networks with networks

# PC3: DMN vs ContA, DorsAttnB, SalVentAttnA, visual networks
print("\n--- PC3: DMN vs ContA, DorsAttnB, SalVentAttnA, visual ---")
dmn_vs_ContA_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='ContA',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_dorsal_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='DorsAttnB',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_SalVentAttnA_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='SalVentAttnA',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_visual_pc3 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='VisCent',
    pc_index=2,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)

# PC4: DMN vs SomMotA/SalVentAttnA
print("\n--- PC4: DMN vs SomMotA, DMN vs SalVentAttnA ---")
dmn_vs_SomMotA_pc4 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='SomMotA',
    pc_index=3,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dmn_vs_SalVentAttnA_pc4 = mean_region_or_network_correlation(
    query='defaultB',
    with_query='SalVentAttnA',
    pc_index=3,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)

# PC5: DorsAttnB vs DMN, DorsAttnB vs ContA, ContA vs DMN, ContA vs SomMotA
print("\n--- PC5: DorsAttnB vs DMN/ContA, ContA vs DMN/SomMotA ---")
dorsal_vs_dmn_pc5 = mean_region_or_network_correlation(
    query='DorsAttnB',
    with_query='defaultB',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
dorsal_vs_ContA_pc5 = mean_region_or_network_correlation(
    query='DorsAttnB',
    with_query='ContA',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
ContA_vs_dmn_pc5 = mean_region_or_network_correlation(
    query='ContA',
    with_query='defaultB',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)
ContA_vs_SomMotA_pc5 = mean_region_or_network_correlation(
    query='ContA',
    with_query='SomMotA',
    pc_index=4,
    atlas_labels=atlas_labels,
    pca=pca,
    fc_size=fc_size,
    reconstruct_full_matrix_func=reconstruct_full_matrix,
    query_type="network"
)

Mean correlation of region(s) matching 'retrosplenial' with all other regions/networks in PC1: 0.0285
Mean correlation of region(s) matching 'retrosplenial' with all other regions/networks in PC2: 0.0263
Mean correlation of region(s) matching 'internal capsule' with all other regions/networks in PC2: 0.0290
Mean correlation of region(s) matching 'cerebellum crus I posterior' with all other regions/networks in PC2: 0.0250

--- PC3: DMN vs ContA, DorsAttnB, SalVentAttnA, visual ---
Mean correlation of network(s) matching 'defaultB' with network(s) matching 'ContA' in PC3: -0.0122
Mean correlation of network(s) matching 'defaultB' with network(s) matching 'DorsAttnB' in PC3: -0.0090
Mean correlation of network(s) matching 'defaultB' with network(s) matching 'SalVentAttnA' in PC3: -0.0043
Mean correlation of network(s) matching 'defaultB' with network(s) matching 'VisCent' in PC3: -0.0075

--- PC4: DMN vs SomMotA, DMN vs SalVentAttnA ---
Mean correlation of network(s) matching 'defaultB' with network(s) matching 'SomMotA' in PC4: -0.0118
Mean correlation of network(s) matching 'defaultB' with network(s) matching 'SalVentAttnA' in PC4: -0.0124

--- PC5: DorsAttnB vs DMN/ContA, ContA vs DMN/SomMotA ---
Mean correlation of network(s) matching 'DorsAttnB' with network(s) matching 'defaultB' in PC5: 0.0062
Mean correlation of network(s) matching 'DorsAttnB' with network(s) matching 'ContA' in PC5: 0.0069
Mean correlation of network(s) matching 'ContA' with network(s) matching 'defaultB' in PC5: -0.0075
Mean correlation of network(s) matching 'ContA' with network(s) matching 'SomMotA' in PC5: -0.0097

Reconstruct the FC with a limited number of components¶

After extracting the principal components, each FC matrix can be expressed as a linear combinations of the principal components plus a residual matrix.

In [8]:

Copied!





print("Shape of PCA components matrix (n_components, n_features):", pca.components_.shape)
# Project PC components back into each FC subject space
fc_projected = pca.transform(fc_concat)[:, :n_comp]
print("Shape of projected FC onto the first n_comp PCs (n_samples, n_comp):", fc_projected.shape)
fc_reconstructed = np.dot(fc_projected, pca.components_[:n_comp, :]) + np.mean(fc_concat, axis=0)
print("Shape of reconstructed FC matrix (n_samples, n_features):", fc_reconstructed.shape)
print("Shape of PCA components matrix (n_components, n_features):", pca.components_.shape)
# Project PC components back into each FC subject space
fc_projected = pca.transform(fc_concat)[:, :n_comp]
print("Shape of projected FC onto the first n_comp PCs (n_samples, n_comp):", fc_projected.shape)
fc_reconstructed = np.dot(fc_projected, pca.components_[:n_comp, :]) + np.mean(fc_concat, axis=0)
print("Shape of reconstructed FC matrix (n_samples, n_features):", fc_reconstructed.shape)

Shape of PCA components matrix (n_components, n_features): (36, 7140)
Shape of projected FC onto the first n_comp PCs (n_samples, n_comp): (36, 6)
Shape of reconstructed FC matrix (n_samples, n_features): (36, 7140)

Evolution of the PC loading across movie repetition¶

In [9]:

Copied!





# Create the figure and subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15,10), sharex=True)
axes = axes.flatten()  # Flatten the axes array for easy indexing

# Ensure axes is iterable even if n_comp == 1
if n_comp == 1:
    axes = [axes]

# Collect p-values for multiple comparisons
slopes = []
pvals = []
regress_results = []

for i in range(n_comp):
    X = np.arange(fc_projected.shape[0])
    y = fc_projected[:, i]
    res = stats.linregress(X, y)
    slopes.append(res.slope)
    pvals.append(res.pvalue)
    regress_results.append(res)

# Correct p-values for multiple comparisons using FDR
_, pvals_fdr = fdrcorrection(pvals, alpha=0.05, method='indep')

for i in range(n_comp):
    ax = axes[i]
    ax.plot(range(fc_projected.shape[0]), fc_projected[:, i], marker='o', label=f'PC{i+1}')
    res = regress_results[i]
    # Use original slope, draw regression line, and annotate with corrected p-value
    ax.plot(
        X,
        res.intercept + res.slope * X,
        linestyle='--',
        label=f'Linear Fit\nβ={res.slope:.3f}, p={pvals_fdr[i]:.2g}'
    )
    ax.set_xticks(np.arange(len(ses_index)))
    ax.set_xticklabels(ses_index, rotation=90)
    ax.set_xlabel('Session Index', fontsize=12)
    ax.set_ylabel('PC Weight', fontsize=12)
    ax.set_title(f'PC{i+1}', fontsize=14)
    ax.legend()

plt.tight_layout()
plt.show()
# Create the figure and subplots
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15,10), sharex=True)
axes = axes.flatten()  # Flatten the axes array for easy indexing

# Ensure axes is iterable even if n_comp == 1
if n_comp == 1:
    axes = [axes]

# Collect p-values for multiple comparisons
slopes = []
pvals = []
regress_results = []

for i in range(n_comp):
    X = np.arange(fc_projected.shape[0])
    y = fc_projected[:, i]
    res = stats.linregress(X, y)
    slopes.append(res.slope)
    pvals.append(res.pvalue)
    regress_results.append(res)

# Correct p-values for multiple comparisons using FDR
_, pvals_fdr = fdrcorrection(pvals, alpha=0.05, method='indep')

for i in range(n_comp):
    ax = axes[i]
    ax.plot(range(fc_projected.shape[0]), fc_projected[:, i], marker='o', label=f'PC{i+1}')
    res = regress_results[i]
    # Use original slope, draw regression line, and annotate with corrected p-value
    ax.plot(
        X,
        res.intercept + res.slope * X,
        linestyle='--',
        label=f'Linear Fit\nβ={res.slope:.3f}, p={pvals_fdr[i]:.2g}'
    )
    ax.set_xticks(np.arange(len(ses_index)))
    ax.set_xticklabels(ses_index, rotation=90)
    ax.set_xlabel('Session Index', fontsize=12)
    ax.set_ylabel('PC Weight', fontsize=12)
    ax.set_title(f'PC{i+1}', fontsize=14)
    ax.legend()

plt.tight_layout()
plt.show()

Most strikingly, the brain pattern captured by PC1 becomes less prevalent across sessions, whereas those captured by PC4 become more prevalent.

Compute the residual matrix¶

In [10]:

Copied!

# Calculate the residuals as the difference between the full and reconstructed FC matrices
residuals = fc_concat - fc_reconstructed
print(f"Residuals matrix shape (n_sessions, n_features): {residuals.shape}")
# Calculate the residuals as the difference between the full and reconstructed FC matrices
residuals = fc_concat - fc_reconstructed
print(f"Residuals matrix shape (n_sessions, n_features): {residuals.shape}")

Residuals matrix shape (n_sessions, n_features): (36, 7140)

In [11]:

Copied!





import numpy as np
from sklearn.decomposition import PCA

# Project the data onto the residual components (from n_comp + 1 onwards)
residuals_frompc = pca.transform(fc_concat)[:, n_comp:]

# Reconstruct the data using the residual components
residuals_frompc = np.dot(residuals_frompc, pca.components_[n_comp:, :]) + np.mean(fc_concat, axis=0)

# Print the shape of the reconstructed data
print(f"Residuals from PC matrix shape (n_sessions, n_features): {residuals_frompc.shape}")
import numpy as np
from sklearn.decomposition import PCA

# Project the data onto the residual components (from n_comp + 1 onwards)
residuals_frompc = pca.transform(fc_concat)[:, n_comp:]

# Reconstruct the data using the residual components
residuals_frompc = np.dot(residuals_frompc, pca.components_[n_comp:, :]) + np.mean(fc_concat, axis=0)

# Print the shape of the reconstructed data
print(f"Residuals from PC matrix shape (n_sessions, n_features): {residuals_frompc.shape}")

Residuals from PC matrix shape (n_sessions, n_features): (36, 7140)

Plot residual matrix¶

Sanity check the reconstruction function¶

In [12]:

Copied!





# Test my reconstruction function
upper_triangle = fc_matrix[np.triu_indices_from(fc_matrix, k=0)]
full_matrix = reconstruct_full_matrix(upper_triangle, fc_size)
full_matrix.shape
assert np.array_equal(full_matrix, fc_matrix)
# Test my reconstruction function
upper_triangle = fc_matrix[np.triu_indices_from(fc_matrix, k=0)]
full_matrix = reconstruct_full_matrix(upper_triangle, fc_size)
full_matrix.shape
assert np.array_equal(full_matrix, fc_matrix)

In [13]:

Copied!





residual = reconstruct_full_matrix(residuals[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()
residual = reconstruct_full_matrix(residuals[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()

In [14]:

Copied!





# Check whether reconstructing the residuals a different way leads to the same result
residual_frompc = reconstruct_full_matrix(residuals_frompc[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual_frompc, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()
# Check whether reconstructing the residuals a different way leads to the same result
residual_frompc = reconstruct_full_matrix(residuals_frompc[0,:], fc_size)

# Plot the residual matrix as a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(residual_frompc, cmap='viridis')
plt.title('Residual Matrix Heatmap')
plt.show()

Plot the original FC, reconstructed FC and residual matrix for each session¶

In [15]:

Copied!





# Iterate over each session
for s in range(fc_concat.shape[0]):
    # Reconstruct the full matrices for original FC, reconstructed FC, and residuals
    original_fc = reconstruct_full_matrix(fc_concat[s, :], fc_size)
    reconstructed_fc = reconstruct_full_matrix(fc_reconstructed[s, :], fc_size)
    residual_fc = reconstruct_full_matrix(residuals[s, :], fc_size)

    # Create a 1x3 subplot
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # Plot the original FC matrix
    sns.heatmap(original_fc, cmap='viridis', ax=axes[0])
    axes[0].set_title(f'Session {ses_index[s]}: Original FC', fontsize=14)
    
    # Plot the reconstructed FC matrix
    sns.heatmap(reconstructed_fc, cmap='viridis', ax=axes[1])
    axes[1].set_title(f'Session {ses_index[s]}: Reconstructed FC', fontsize=14)
    
    # Plot the residual matrix
    sns.heatmap(residual_fc, cmap='viridis', ax=axes[2])
    axes[2].set_title(f'Session {ses_index[s]}: Residual', fontsize=14)
    
    # Adjust layout and display the plot
    plt.tight_layout()
    plt.show()
# Iterate over each session
for s in range(fc_concat.shape[0]):
    # Reconstruct the full matrices for original FC, reconstructed FC, and residuals
    original_fc = reconstruct_full_matrix(fc_concat[s, :], fc_size)
    reconstructed_fc = reconstruct_full_matrix(fc_reconstructed[s, :], fc_size)
    residual_fc = reconstruct_full_matrix(residuals[s, :], fc_size)

    # Create a 1x3 subplot
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # Plot the original FC matrix
    sns.heatmap(original_fc, cmap='viridis', ax=axes[0])
    axes[0].set_title(f'Session {ses_index[s]}: Original FC', fontsize=14)
    
    # Plot the reconstructed FC matrix
    sns.heatmap(reconstructed_fc, cmap='viridis', ax=axes[1])
    axes[1].set_title(f'Session {ses_index[s]}: Reconstructed FC', fontsize=14)
    
    # Plot the residual matrix
    sns.heatmap(residual_fc, cmap='viridis', ax=axes[2])
    axes[2].set_title(f'Session {ses_index[s]}: Residual', fontsize=14)
    
    # Adjust layout and display the plot
    plt.tight_layout()
    plt.show()

Plot the variance of the residuals over sessions¶

In [16]:

Copied!





def plot_res_variance(residuals, groupedby=False, min_res_var=0, max_res_var=1000, ses_id=None):
    from scipy.optimize import curve_fit
    from sklearn.metrics import r2_score
    
    # Compute the variance of the residuals for each session
    residuals_var = np.var(residuals, axis=1)
    min_res_var = min(min_res_var, min(residuals_var))
    max_res_var = max(max_res_var, max(residuals_var))

    # Plot the variance of the residuals as a function of the session index
    plt.plot(range(len(residuals_var)), residuals_var, marker='o', linestyle='-', label='Variance of Residuals')

    # Fit and plot a negative exponential curve
    def neg_exp(x, a, b, c):
        return a * np.exp(-b * x) + c

    if residuals_var.shape[0] > 1:
        # Fit and plot a linear curve
        linear_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 1))
        linear_r2 = r2_score(residuals_var, linear_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), linear_model(range(len(residuals_var))), linestyle='--', label=f'Linear Fit (R$^2$={linear_r2:.2f})')

    if residuals_var.shape[0] > 3:
        #popt, _ = curve_fit(neg_exp, range(len(residuals_var)), residuals_var, p0=(1, 0.01, 1))
        #neg_exp_r2 = r2_score(residuals_var, neg_exp(range(len(residuals_var)), *popt))
        #plt.plot(range(len(residuals_var)), neg_exp(range(len(residuals_var)), *popt), linestyle='--', label=f'Negative Exponential Fit (R$^2$={neg_exp_r2:.2f})')
        
        # Fit and plot a quadratic curve
        quadratic_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 2))
        quadratic_r2 = r2_score(residuals_var, quadratic_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), quadratic_model(range(len(residuals_var))), linestyle='--', label=f'Quadratic Fit (R$^2$={quadratic_r2:.2f})')
        
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)
        
    return min_res_var, max_res_var
   
def plot_res_variance(residuals, groupedby=False, min_res_var=0, max_res_var=1000, ses_id=None):
    from scipy.optimize import curve_fit
    from sklearn.metrics import r2_score
    
    # Compute the variance of the residuals for each session
    residuals_var = np.var(residuals, axis=1)
    min_res_var = min(min_res_var, min(residuals_var))
    max_res_var = max(max_res_var, max(residuals_var))

    # Plot the variance of the residuals as a function of the session index
    plt.plot(range(len(residuals_var)), residuals_var, marker='o', linestyle='-', label='Variance of Residuals')

    # Fit and plot a negative exponential curve
    def neg_exp(x, a, b, c):
        return a * np.exp(-b * x) + c

    if residuals_var.shape[0] > 1:
        # Fit and plot a linear curve
        linear_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 1))
        linear_r2 = r2_score(residuals_var, linear_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), linear_model(range(len(residuals_var))), linestyle='--', label=f'Linear Fit (R$^2$={linear_r2:.2f})')

    if residuals_var.shape[0] > 3:
        #popt, _ = curve_fit(neg_exp, range(len(residuals_var)), residuals_var, p0=(1, 0.01, 1))
        #neg_exp_r2 = r2_score(residuals_var, neg_exp(range(len(residuals_var)), *popt))
        #plt.plot(range(len(residuals_var)), neg_exp(range(len(residuals_var)), *popt), linestyle='--', label=f'Negative Exponential Fit (R$^2$={neg_exp_r2:.2f})')
        
        # Fit and plot a quadratic curve
        quadratic_model = np.poly1d(np.polyfit(range(len(residuals_var)), residuals_var, 2))
        quadratic_r2 = r2_score(residuals_var, quadratic_model(range(len(residuals_var))))
        plt.plot(range(len(residuals_var)), quadratic_model(range(len(residuals_var))), linestyle='--', label=f'Quadratic Fit (R$^2$={quadratic_r2:.2f})')
        
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)
        
    return min_res_var, max_res_var
   

In [17]:

Copied!





# Exclude 30th column
#residuals_wo_outlier = np.delete(residuals, 30, axis=1)

plt.figure(figsize=(10, 6))

# Plot with residuals
#plt.subplot(1, 2, 1)
plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)), labels=ses_index, rotation=90)
plt.xlabel('Session Index', fontsize=16)
plt.ylabel('Variance of Residuals', fontsize=16)
#plt.title('All Sessions', fontsize=16)
#plt.ylim(0.06, 0.24)

"""
# Originally in the abstract I had investigated the effect of excluding the outlier session on the variance of the residuals.
# Plot with residuals_wo_outlier
plt.subplot(1, 2, 2)
plot_res_variance(residuals_wo_outlier)
#plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)-1), labels=np.delete(ses_index, 30), rotation=90)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Variance of Residuals', fontsize=14)
plt.title('Excluding the Outlier', fontsize=16)
#plt.ylim(0.06, 0.24)
"""


plt.tight_layout()
plt.show()
# Exclude 30th column
#residuals_wo_outlier = np.delete(residuals, 30, axis=1)

plt.figure(figsize=(10, 6))

# Plot with residuals
#plt.subplot(1, 2, 1)
plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)), labels=ses_index, rotation=90)
plt.xlabel('Session Index', fontsize=16)
plt.ylabel('Variance of Residuals', fontsize=16)
#plt.title('All Sessions', fontsize=16)
#plt.ylim(0.06, 0.24)

"""
# Originally in the abstract I had investigated the effect of excluding the outlier session on the variance of the residuals.
# Plot with residuals_wo_outlier
plt.subplot(1, 2, 2)
plot_res_variance(residuals_wo_outlier)
#plot_res_variance(residuals)
plt.xticks(ticks=np.arange(len(ses_index)-1), labels=np.delete(ses_index, 30), rotation=90)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Variance of Residuals', fontsize=14)
plt.title('Excluding the Outlier', fontsize=16)
#plt.ylim(0.06, 0.24)
"""


plt.tight_layout()
plt.show()

Plot edgewise residual over sessions¶

In [18]:

Copied!





def plot_edgewise_residual(residuals, groupedby=False, ses_id=None):
    # Calculate the interquartile range (IQR)
    q1 = np.percentile(residuals, 25, axis=1)
    q3 = np.percentile(residuals, 75, axis=1)

    # Plot the evolution of the residual for each edge
    for i in range(residuals.shape[1]):
        plt.plot(range(residuals.shape[0]), residuals[:,i], color='blue', alpha=0.1)

    # Plot the inter-quartile
    plt.plot(range(residuals.shape[0]), q1, color='orange', label='25th percentile')
    plt.plot(range(residuals.shape[0]), q3, color='orange', label='75th percentile')
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)
def plot_edgewise_residual(residuals, groupedby=False, ses_id=None):
    # Calculate the interquartile range (IQR)
    q1 = np.percentile(residuals, 25, axis=1)
    q3 = np.percentile(residuals, 75, axis=1)

    # Plot the evolution of the residual for each edge
    for i in range(residuals.shape[1]):
        plt.plot(range(residuals.shape[0]), residuals[:,i], color='blue', alpha=0.1)

    # Plot the inter-quartile
    plt.plot(range(residuals.shape[0]), q1, color='orange', label='25th percentile')
    plt.plot(range(residuals.shape[0]), q3, color='orange', label='75th percentile')
    if ses_id is not None:
        plt.xticks(ticks=np.arange(len(ses_id)), labels=ses_id)
    plt.tick_params(axis='x', labelsize=12)
    plt.tick_params(axis='y', labelsize=12)
    plt.legend()
    if groupedby:
        plt.title(groupedby, fontsize=16)

In [19]:

Copied!





plot_edgewise_residual(residuals)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Residual Value', fontsize=14)
plt.show()
plot_edgewise_residual(residuals)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Residual Value', fontsize=14)
plt.show()

Retrieve confounds¶

Retrieve scan time and day of week from the `scans.tsv` in the dataset repo¶

In [20]:

Copied!





# Retrieve the confounds from the scans.tsv
confounds_of_interest = [
    'caffeine_2h', 
    'caffeine_24h'
]

scans_df = get_confounds(dataset_path, confound_path, confounds_of_interest, iqms_path=iqms_path)

# Keep only the magnitude part of functional scans and first echo
scans_df = scans_df[scans_df["modality"] == "func"]

# Create dataframe based on the list of files
files_df = pd.DataFrame(files, columns=['file_path'])
files_df = files_df.assign(
    subject=files_df["file_path"].str.extract(r"sub-(\d+)/"),
    session=files_df["file_path"].str.extract(r"ses-(\w+)/"),
    task=files_df["file_path"].str.extract(r"task-(\w+)_"),
)

#Pair filenames with acquisition time
confounds_df = pd.merge(
        scans_df, files_df, on=["subject", "session", "task"], how="inner"
    )
confounds_df.drop(columns=["file_path"], inplace=True)

print(confounds_df.head())
# Retrieve the confounds from the scans.tsv
confounds_of_interest = [
    'caffeine_2h', 
    'caffeine_24h'
]

scans_df = get_confounds(dataset_path, confound_path, confounds_of_interest, iqms_path=iqms_path)

# Keep only the magnitude part of functional scans and first echo
scans_df = scans_df[scans_df["modality"] == "func"]

# Create dataframe based on the list of files
files_df = pd.DataFrame(files, columns=['file_path'])
files_df = files_df.assign(
    subject=files_df["file_path"].str.extract(r"sub-(\d+)/"),
    session=files_df["file_path"].str.extract(r"ses-(\w+)/"),
    task=files_df["file_path"].str.extract(r"task-(\w+)_"),
)

#Pair filenames with acquisition time
confounds_df = pd.merge(
        scans_df, files_df, on=["subject", "session", "task"], how="inner"
    )
confounds_df.drop(columns=["file_path"], inplace=True)

print(confounds_df.head())

Warning: Only the IQMs corresponding to the second echo are retained in the IQMs DataFrame.
                                            filename  \
0  sub-001/ses-001/func/sub-001_ses-001_task-rsmo...   
1  sub-001/ses-003/func/sub-001_ses-003_task-rsmo...   
2  sub-001/ses-004/func/sub-001_ses-004_task-rsmo...   
3  sub-001/ses-005/func/sub-001_ses-005_task-rsmo...   
4  sub-001/ses-006/func/sub-001_ses-006_task-rsmo...   

                     acq_time subject session modality     task pe_dir  \
0  2023-10-20T19:50:01.187500     001     001     func  rsmovie     LR   
1  2023-10-21T10:12:32.207500     001     003     func  rsmovie     LR   
2  2023-10-21T12:22:08.222500     001     004     func  rsmovie     RL   
3  2023-10-22T10:33:53.380000     001     005     func  rsmovie     PA   
4  2023-10-22T12:18:06.200000     001     006     func  rsmovie     PA   

  day_of_week time_of_day   fd_mean  caffeine_2h  caffeine_24h  
0      Friday    20:00:00  0.119245          0.0           2.0  
1    Saturday    10:00:00  0.096035          1.0           1.0  
2    Saturday    12:00:00  0.146882          0.0           1.0  
3      Sunday    11:00:00  0.112293          1.0           1.0  
4      Sunday    12:00:00  0.088701          1.0           1.0

In [21]:

Copied!





# Merge residuals with confounds_df to get the time_of_day for each session
residuals_df = pd.DataFrame(residuals)
print("Shape of residuals array (n_samples, n_features):", residuals.shape)
residuals_df = residuals_df.assign(
    session = files_df['session'],
)
residuals_df = pd.merge(
    residuals_df, 
    confounds_df[['session', 'day_of_week', 'time_of_day', 'pe_dir', 'caffeine_2h', 'caffeine_24h']], 
    on='session'
)
print("Shape of residuals_df after merging with confounds_df:", residuals_df.shape)
print("First five rows of residuals_df after merge:")
print(residuals_df.head())
# Merge residuals with confounds_df to get the time_of_day for each session
residuals_df = pd.DataFrame(residuals)
print("Shape of residuals array (n_samples, n_features):", residuals.shape)
residuals_df = residuals_df.assign(
    session = files_df['session'],
)
residuals_df = pd.merge(
    residuals_df, 
    confounds_df[['session', 'day_of_week', 'time_of_day', 'pe_dir', 'caffeine_2h', 'caffeine_24h']], 
    on='session'
)
print("Shape of residuals_df after merging with confounds_df:", residuals_df.shape)
print("First five rows of residuals_df after merge:")
print(residuals_df.head())

Shape of residuals array (n_samples, n_features): (36, 7140)
Shape of residuals_df after merging with confounds_df: (36, 7146)
First five rows of residuals_df after merge:
              0         1         2         3         4         5         6  \
0 -1.731915e-15  0.008568 -0.014353 -0.080859 -0.002130 -0.012745 -0.011353   
1 -1.081627e-15  0.001091  0.020877 -0.020616  0.032127  0.067218  0.034650   
2 -8.339862e-16  0.016150 -0.056996 -0.079078 -0.110628 -0.012115  0.010908   
3  8.614996e-17  0.013174 -0.056856 -0.045357  0.031062  0.009405 -0.018774   
4 -8.310795e-16 -0.030164  0.134168  0.060146  0.034667  0.028640  0.026211   

          7         8         9  ...      7136  7137      7138  7139  session  \
0  0.002458  0.055776  0.125133  ...  0.026551   0.0  0.031856   0.0      001   
1  0.029521 -0.033804 -0.107729  ...  0.031296   0.0  0.035877   0.0      003   
2  0.011550  0.001944  0.015197  ...  0.000338   0.0 -0.024277   0.0      004   
3 -0.024694  0.006120 -0.001600  ... -0.009069   0.0  0.026941   0.0      005   
4  0.040799 -0.000657 -0.049267  ... -0.041234   0.0 -0.006108   0.0      006   

   day_of_week  time_of_day  pe_dir  caffeine_2h  caffeine_24h  
0       Friday     20:00:00      LR          0.0           2.0  
1     Saturday     10:00:00      LR          1.0           1.0  
2     Saturday     12:00:00      RL          0.0           1.0  
3       Sunday     11:00:00      PA          1.0           1.0  
4       Sunday     12:00:00      PA          1.0           1.0  

[5 rows x 7146 columns]

In [22]:

Copied!





res = residuals_df.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir']).values.T
residuals_df = residuals_df.assign(
    variance = np.var(res, axis=0)
)
# Cast strings to type compatible with statistical models
residuals_df["session"] = pd.to_numeric(residuals_df["session"])
residuals_df["caffeine_2h"] = pd.to_numeric(residuals_df["caffeine_2h"])
residuals_df["caffeine_24h"] = pd.to_numeric(residuals_df["caffeine_24h"])
residuals_df["day_of_week"] = residuals_df["day_of_week"].astype("category")
residuals_df["time_of_day"] = residuals_df["time_of_day"].astype("category")
residuals_df["pe_dir"] = residuals_df["pe_dir"].astype("category")
print(residuals_df.head())

print("Here are the characteristics of the session with the highest variance:")
max_variance_row = residuals_df.loc[residuals_df['variance'].idxmax()]
print(max_variance_row)
residuals_wo_outlier_df = residuals_df.drop(max_variance_row.name)
res = residuals_df.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir']).values.T
residuals_df = residuals_df.assign(
    variance = np.var(res, axis=0)
)
# Cast strings to type compatible with statistical models
residuals_df["session"] = pd.to_numeric(residuals_df["session"])
residuals_df["caffeine_2h"] = pd.to_numeric(residuals_df["caffeine_2h"])
residuals_df["caffeine_24h"] = pd.to_numeric(residuals_df["caffeine_24h"])
residuals_df["day_of_week"] = residuals_df["day_of_week"].astype("category")
residuals_df["time_of_day"] = residuals_df["time_of_day"].astype("category")
residuals_df["pe_dir"] = residuals_df["pe_dir"].astype("category")
print(residuals_df.head())

print("Here are the characteristics of the session with the highest variance:")
max_variance_row = residuals_df.loc[residuals_df['variance'].idxmax()]
print(max_variance_row)
residuals_wo_outlier_df = residuals_df.drop(max_variance_row.name)

              0         1         2         3         4         5         6  \
0 -1.731915e-15  0.008568 -0.014353 -0.080859 -0.002130 -0.012745 -0.011353   
1 -1.081627e-15  0.001091  0.020877 -0.020616  0.032127  0.067218  0.034650   
2 -8.339862e-16  0.016150 -0.056996 -0.079078 -0.110628 -0.012115  0.010908   
3  8.614996e-17  0.013174 -0.056856 -0.045357  0.031062  0.009405 -0.018774   
4 -8.310795e-16 -0.030164  0.134168  0.060146  0.034667  0.028640  0.026211   

          7         8         9  ...  7137      7138  7139  session  \
0  0.002458  0.055776  0.125133  ...   0.0  0.031856   0.0        1   
1  0.029521 -0.033804 -0.107729  ...   0.0  0.035877   0.0        3   
2  0.011550  0.001944  0.015197  ...   0.0 -0.024277   0.0        4   
3 -0.024694  0.006120 -0.001600  ...   0.0  0.026941   0.0        5   
4  0.040799 -0.000657 -0.049267  ...   0.0 -0.006108   0.0        6   

   day_of_week  time_of_day  pe_dir  caffeine_2h  caffeine_24h  variance  
0       Friday     20:00:00      LR          0.0           2.0  0.002463  
1     Saturday     10:00:00      LR          1.0           1.0  0.002472  
2     Saturday     12:00:00      RL          0.0           1.0  0.002126  
3       Sunday     11:00:00      PA          1.0           1.0  0.001782  
4       Sunday     12:00:00      PA          1.0           1.0  0.002011  

[5 rows x 7147 columns]
Here are the characteristics of the session with the highest variance:
0                    0.0
1                -0.0818
2               0.034087
3              -0.179126
4              -0.102308
                  ...   
time_of_day     20:00:00
pe_dir                RL
caffeine_2h          0.0
caffeine_24h         2.0
variance        0.004153
Name: 21, Length: 7147, dtype: object

Test habituation significance¶

In [23]:

Copied!





# Fit the linear mixed effects model
X = sm.add_constant(residuals_df["session"]) # Adds the intercept term
y = residuals_df["variance"]
model = sm.OLS(y, X).fit()
print(model.summary())
# Fit the linear mixed effects model
X = sm.add_constant(residuals_df["session"]) # Adds the intercept term
y = residuals_df["variance"]
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               variance   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                 -0.013
Method:                 Least Squares   F-statistic:                    0.5514
Date:                Fri, 10 Apr 2026   Prob (F-statistic):              0.463
Time:                        20:21:40   Log-Likelihood:                 217.20
No. Observations:                  36   AIC:                            -430.4
Df Residuals:                      34   BIC:                            -427.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0027      0.000     14.080      0.000       0.002       0.003
session     4.997e-06   6.73e-06      0.743      0.463   -8.68e-06    1.87e-05
==============================================================================
Omnibus:                        1.571   Durbin-Watson:                   1.210
Prob(Omnibus):                  0.456   Jarque-Bera (JB):                1.178
Skew:                           0.441   Prob(JB):                        0.555
Kurtosis:                       2.906   Cond. No.                         53.5
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [24]:

Copied!





# #Statistics excluding the outlier
# # Fit the linear mixed effects model
# X = sm.add_constant(residuals_wo_outlier_df["session"]) # Adds the intercept term
# y = residuals_wo_outlier_df["variance"]
# model_wo_outlier = sm.OLS(y, X).fit()
# print(model_wo_outlier.summary())
# #Statistics excluding the outlier
# # Fit the linear mixed effects model
# X = sm.add_constant(residuals_wo_outlier_df["session"]) # Adds the intercept term
# y = residuals_wo_outlier_df["variance"]
# model_wo_outlier = sm.OLS(y, X).fit()
# print(model_wo_outlier.summary())

In [25]:

Copied!

# from statsmodels.stats.multitest import multipletests

# # Assuming you have a list of p-values from multiple tests
# p_values = [model.pvalues["session"], model_wo_outlier.pvalues["session"]]

# # Apply Bonferroni correction
# corrected_p_values = multipletests(p_values, method='bonferroni')[1]

# print(f"Original p-values: {p_values}")
# print(f"Corrected p-values: {corrected_p_values}")
# from statsmodels.stats.multitest import multipletests

# # Assuming you have a list of p-values from multiple tests
# p_values = [model.pvalues["session"], model_wo_outlier.pvalues["session"]]

# # Apply Bonferroni correction
# corrected_p_values = multipletests(p_values, method='bonferroni')[1]

# print(f"Original p-values: {p_values}")
# print(f"Corrected p-values: {corrected_p_values}")

Test habituation significance when accounting for confounds of no interest¶

Based on the interaction plots below, we determined that three out of the five confounds of interest we investigated seem to have an influence on the residual evolution. As such, let us add day of week, coffee consumption in the 24h prior to acquisition and phase encoding direction as nuisance variable in the OLS.

In [26]:

Copied!





# Add day_of_week as a nuisance variable (include it as a covariate, one-hot encoded)
X = pd.get_dummies(residuals_df[["session", "day_of_week"]], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())
# Add day_of_week as a nuisance variable (include it as a covariate, one-hot encoded)
X = pd.get_dummies(residuals_df[["session", "day_of_week"]], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               variance   R-squared:                       0.156
Model:                            OLS   Adj. R-squared:                 -0.055
Method:                 Least Squares   F-statistic:                    0.7412
Date:                Fri, 10 Apr 2026   Prob (F-statistic):              0.639
Time:                        20:21:40   Log-Likelihood:                 219.97
No. Observations:                  36   AIC:                            -423.9
Df Residuals:                      28   BIC:                            -411.3
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                     0.0029      0.000      9.187      0.000       0.002       0.004
session                4.848e-06   6.99e-06      0.694      0.494   -9.47e-06    1.92e-05
day_of_week_Monday    -7.429e-05      0.000     -0.201      0.842      -0.001       0.001
day_of_week_Saturday     -0.0005      0.000     -1.234      0.228      -0.001       0.000
day_of_week_Sunday       -0.0005      0.000     -1.362      0.184      -0.001       0.000
day_of_week_Thursday     -0.0003      0.000     -0.705      0.487      -0.001       0.001
day_of_week_Tuesday      -0.0001      0.000     -0.371      0.713      -0.001       0.001
day_of_week_Wednesday     0.0002      0.000      0.415      0.682      -0.001       0.001
==============================================================================
Omnibus:                        5.252   Durbin-Watson:                   1.404
Prob(Omnibus):                  0.072   Jarque-Bera (JB):                3.974
Skew:                           0.784   Prob(JB):                        0.137
Kurtosis:                       3.440   Cond. No.                         213.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [27]:

Copied!





# Add phase encoding direction (pe_dir) as a nuisance variable (include it as a covariate, one-hot encoded)
X = pd.get_dummies(residuals_df[["session", "pe_dir"]], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())
# Add phase encoding direction (pe_dir) as a nuisance variable (include it as a covariate, one-hot encoded)
X = pd.get_dummies(residuals_df[["session", "pe_dir"]], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               variance   R-squared:                       0.180
Model:                            OLS   Adj. R-squared:                  0.074
Method:                 Least Squares   F-statistic:                     1.701
Date:                Fri, 10 Apr 2026   Prob (F-statistic):              0.175
Time:                        20:21:40   Log-Likelihood:                 220.48
No. Observations:                  36   AIC:                            -431.0
Df Residuals:                      31   BIC:                            -423.1
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0025      0.000     10.045      0.000       0.002       0.003
session     6.774e-06   6.53e-06      1.037      0.308   -6.55e-06    2.01e-05
pe_dir_LR   9.676e-05      0.000      0.360      0.722      -0.000       0.001
pe_dir_PA     -0.0002      0.000     -0.737      0.467      -0.001       0.000
pe_dir_RL      0.0005      0.000      1.690      0.101   -9.49e-05       0.001
==============================================================================
Omnibus:                        0.781   Durbin-Watson:                   1.125
Prob(Omnibus):                  0.677   Jarque-Bera (JB):                0.832
Skew:                           0.211   Prob(JB):                        0.660
Kurtosis:                       2.387   Cond. No.                         126.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [28]:

Copied!





# Add caffeine_24h as a nuisance variable (include it as a covariate, one-hot encoded)
X = pd.get_dummies(residuals_df[["session", "caffeine_24h"]], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())
# Add caffeine_24h as a nuisance variable (include it as a covariate, one-hot encoded)
X = pd.get_dummies(residuals_df[["session", "caffeine_24h"]], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               variance   R-squared:                       0.370
Model:                            OLS   Adj. R-squared:                  0.332
Method:                 Least Squares   F-statistic:                     9.700
Date:                Fri, 10 Apr 2026   Prob (F-statistic):           0.000486
Time:                        20:21:40   Log-Likelihood:                 225.24
No. Observations:                  36   AIC:                            -444.5
Df Residuals:                      33   BIC:                            -439.7
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
================================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.0012      0.000      3.147      0.003       0.000       0.002
session       4.643e-06   5.47e-06      0.849      0.402   -6.48e-06    1.58e-05
caffeine_24h     0.0008      0.000      4.309      0.000       0.000       0.001
==============================================================================
Omnibus:                        4.380   Durbin-Watson:                   1.914
Prob(Omnibus):                  0.112   Jarque-Bera (JB):                3.383
Skew:                           0.744   Prob(JB):                        0.184
Kurtosis:                       3.200   Cond. No.                         146.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [29]:

Copied!





# Add caffeine_24h, pe_dir, and day_of_week as nuisance variables (all one-hot encoded)
nuisance_vars = ["caffeine_24h", "pe_dir", "day_of_week"]
X = pd.get_dummies(residuals_df[["session"] + nuisance_vars], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())
# Add caffeine_24h, pe_dir, and day_of_week as nuisance variables (all one-hot encoded)
nuisance_vars = ["caffeine_24h", "pe_dir", "day_of_week"]
X = pd.get_dummies(residuals_df[["session"] + nuisance_vars], drop_first=True)
X = sm.add_constant(X)
# Convert all data to float64 to avoid dtype=object errors
X = X.astype('float64')
y = pd.to_numeric(residuals_df["variance"], errors='coerce')
y = y.astype('float64')
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:               variance   R-squared:                       0.553
Model:                            OLS   Adj. R-squared:                  0.348
Method:                 Least Squares   F-statistic:                     2.697
Date:                Fri, 10 Apr 2026   Prob (F-statistic):             0.0204
Time:                        20:21:40   Log-Likelihood:                 231.40
No. Observations:                  36   AIC:                            -438.8
Df Residuals:                      24   BIC:                            -419.8
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
=========================================================================================
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                     0.0004      0.001      0.603      0.552      -0.001       0.002
session                8.417e-06   5.62e-06      1.497      0.147   -3.19e-06       2e-05
caffeine_24h              0.0011      0.000      3.844      0.001       0.001       0.002
pe_dir_LR              8.816e-05      0.000      0.355      0.726      -0.000       0.001
pe_dir_PA                -0.0002      0.000     -0.769      0.449      -0.001       0.000
pe_dir_RL                 0.0004      0.000      1.547      0.135      -0.000       0.001
day_of_week_Monday       -0.0001      0.000     -0.479      0.636      -0.001       0.000
day_of_week_Saturday      0.0004      0.000      1.092      0.286      -0.000       0.001
day_of_week_Sunday        0.0004      0.000      1.046      0.306      -0.000       0.001
day_of_week_Thursday     -0.0002      0.000     -0.640      0.528      -0.001       0.000
day_of_week_Tuesday      -0.0001      0.000     -0.444      0.661      -0.001       0.000
day_of_week_Wednesday     0.0002      0.000      0.667      0.511      -0.001       0.001
==============================================================================
Omnibus:                        0.477   Durbin-Watson:                   1.731
Prob(Omnibus):                  0.788   Jarque-Bera (JB):                0.403
Skew:                           0.240   Prob(JB):                        0.817
Kurtosis:                       2.803   Cond. No.                         297.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

Accounting for confounds of no interest still indicates that the residual variance modestly and non-significantly increase over movie repetition. As such, it does not change our conclusion that we did not find conclusive evidence of an habituation effect.

Plot residuals grouped by bins of confound factor¶

Day of week¶

In [30]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'day'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 3, figsize=(20, 20))
min_res_var = 1000
max_res_var = 0
for i, (day_of_week, group) in enumerate(residuals_df.groupby('day_of_week', observed=False)):
    plt.sca(axes[i // 3, i % 3])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=day_of_week, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)
    
# Hide the last two subplots (axes[2, 1] and axes[2, 2])
axes[2, 1].axis('off')
axes[2, 2].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by day of week.**" \
"We fitted negative exponential and quadratic curves only if the plot had more than three points."
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'day'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 3, figsize=(20, 20))
min_res_var = 1000
max_res_var = 0
for i, (day_of_week, group) in enumerate(residuals_df.groupby('day_of_week', observed=False)):
    plt.sca(axes[i // 3, i % 3])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=day_of_week, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)
    
# Hide the last two subplots (axes[2, 1] and axes[2, 2])
axes[2, 1].axis('off')
axes[2, 2].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by day of week.**" \
"We fitted negative exponential and quadratic curves only if the plot had more than three points."
save_caption(caption, save_layout, entities)

Based on the interaction plot, day of week seems to influence the residuals evolution, as such we will add it the OLS as a nuisance regressor.

Time of day¶

In [31]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'time'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 4, figsize=(30, 20))
min_res_var = 1000
max_res_var = 0
for i, (time_of_day, group) in enumerate(residuals_df.groupby('time_of_day', observed=False)):
    print("Time of day:", time_of_day)
    plt.sca(axes[i // 4, i % 4])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    print("Shape of residuals:", res.shape)
    if res.shape[1] > 1:
        min_res_var, max_res_var = plot_res_variance(res, groupedby=time_of_day, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.01, max_res_var+0.01)

 # Hide the last axis
axes[2, 2].axis('off')
axes[2, 3].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by time of day.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'time'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(3, 4, figsize=(30, 20))
min_res_var = 1000
max_res_var = 0
for i, (time_of_day, group) in enumerate(residuals_df.groupby('time_of_day', observed=False)):
    print("Time of day:", time_of_day)
    plt.sca(axes[i // 4, i % 4])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    print("Shape of residuals:", res.shape)
    if res.shape[1] > 1:
        min_res_var, max_res_var = plot_res_variance(res, groupedby=time_of_day, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.01, max_res_var+0.01)

 # Hide the last axis
axes[2, 2].axis('off')
axes[2, 3].axis('off')
fig.text(0.5, 0.08, 'Session Index', ha='center', fontsize=14)
fig.text(0.08, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by time of day.**"
save_caption(caption, save_layout, entities)

Time of day: 09:00:00
Shape of residuals: (1, 7141)
Time of day: 10:00:00
Shape of residuals: (2, 7141)
Time of day: 11:00:00
Shape of residuals: (2, 7141)
Time of day: 12:00:00
Shape of residuals: (3, 7141)
Time of day: 18:00:00
Shape of residuals: (2, 7141)
Time of day: 19:00:00
Shape of residuals: (2, 7141)
Time of day: 20:00:00
Shape of residuals: (11, 7141)
Time of day: 21:00:00
Shape of residuals: (5, 7141)
Time of day: 22:00:00
Shape of residuals: (7, 7141)
Time of day: 23:00:00
Shape of residuals: (1, 7141)

Based on the interaction plot, time of day does not have a substantial influence on the residuals evolution, as such it is not necessary to add it as a nuisance variable in the OLS.

Phase Encoding Direction¶

In [32]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=pe_dir, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by phase encoding direction.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=pe_dir, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by phase encoding direction.**"
save_caption(caption, save_layout, entities)

Based on the interaction plot, phase encoding direction seems to influence the residuals evolution, as such we will add it the OLS as a nuisance regressor.

In [33]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=pe_dir, ses_id=ses_id)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'pe_dir'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(2, 2, figsize=(12, 12))
min_res_var = 1000
max_res_var = 0
for i, (pe_dir, group) in enumerate(residuals_df.groupby('pe_dir', observed=False)):
    plt.sca(axes[i // 2, i % 2])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=pe_dir, ses_id=ses_id)

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

/tmp/ipykernel_709473/2248055642.py:18: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  plt.savefig(save_path)
/home/cprovins/miniconda3/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  fig.canvas.print_figure(bytes_io, **kw)

Caffeine consumption 2h before¶

In [34]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(21, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0001, max_res_var+0.0001)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(21, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0001, max_res_var+0.0001)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)

In [35]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(19, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id= ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine2h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(19, 6))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_2h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id= ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=90)
axes[0].set_title("0 cup of coffee")
axes[1].set_title("1 cup of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 2h before.**"
save_caption(caption, save_layout, entities)

Caffeine consumption 24h before¶

In [36]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'resvar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    min_res_var, max_res_var = plot_res_variance(res, groupedby=caf, min_res_var=min_res_var, max_res_var=max_res_var, ses_id=ses_id)

#Set same y-axis limits for all subplots
for ax in axes.flat:
    ax.set_ylim(min_res_var-0.0005, max_res_var+0.0005)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Residual variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)

Based on the interaction plot, coffee consumption seems to influence the residuals evolution, as such we will add it the OLS as a nuisance regressor.

In [37]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id=ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'edgewisevar', 'confound': 'caffeine24h'}
entities.update(entities_base)

# Create figure with subplots for variance and edgewise residuals
fig, axes = plt.subplots(1, 2, figsize=(16, 7))
min_res_var = 1000
max_res_var = 0
for i, (caf, group) in enumerate(residuals_df.groupby('caffeine_24h')):
    plt.sca(axes[i])
    ses_id = group['session'].apply(lambda x: str(x).zfill(3)).values
    res = group.drop(columns=['session', 'day_of_week', 'time_of_day','pe_dir','caffeine_2h', 'caffeine_24h']).values
    plot_edgewise_residual(res, groupedby=caf, ses_id=ses_id)

# Set x-ticks with 90 degree rotation for the first subplot
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=90)
axes[0].set_title("1 cup of coffee")
axes[1].set_title("2 cups of coffee")

fig.text(0.5, 0.05, 'Session Index', ha='center', fontsize=14)
fig.text(0.05, 0.5, 'Variance of Residuals', va='center', rotation='vertical', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()

caption = f"**Edgewise variance evolution separated by caffeine consumption 24h before.**"
save_caption(caption, save_layout, entities)

Do some confound explain why ses-043 is seemingly less correlated to the other sessions? Can we find a reason that justifies its exclusion from the fMRI analysis?¶

In [38]:

Copied!





confounds_path = '/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv'
confounds_df = pd.read_csv(confounds_path, sep='\t')
# Keep only the confounds from the reliability sessions
confounds_df = confounds_df[confounds_df['session_number'].astype(str).str.zfill(3).str.startswith('0')]
confounds_df['session_number'] = confounds_df['session_number'].astype(str).str.zfill(3)
confounds_df.head()
confounds_path = '/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv'
confounds_df = pd.read_csv(confounds_path, sep='\t')
# Keep only the confounds from the reliability sessions
confounds_df = confounds_df[confounds_df['session_number'].astype(str).str.zfill(3).str.startswith('0')]
confounds_df['session_number'] = confounds_df['session_number'].astype(str).str.zfill(3)
confounds_df.head()

Out[38]:

	participant_id	session_number	scanner	pe_dir	date	time	weather_temperature	weather_wind	weather_precipitation	weather_precipitation_type	...	slept_qct	slept_rest	slept_bht	rumination	anxiety	physical_pain	time_slept_t1w	time_slept_rest	time_slept_diffusion	details_rumination_anxiety_pain
37	1	048	Prisma	PA	11-14-2023	20:45	11-14	10.4	43.0	4 Rain or drizzle	...	No	No	No	3.0	6.0	6.0	NaN	NaN	NaN	NaN
38	1	047	Prisma	RL	11-14-2023	18:50	11-14	10.4	43.0	4 Rain or drizzle	...	No	No	No	3.0	6.0	5.0	NaN	NaN	NaN	NaN
39	1	046	Prisma	LR	11-13-2023	20:15	10-16	8.7	7.0	2 Scattered precipitation (liquid)	...	No	No	No	2.0	6.0	6.0	NaN	NaN	NaN	NaN
40	1	045	Prisma	LR	11-13-2023	18:30	10-16	8.7	7.0	2 Scattered precipitation (liquid)	...	No	No	No	3.0	6.0	6.0	NaN	NaN	NaN	NaN
41	1	044	Prisma	LR	11-12-2023	11:10	5-10	7.0	0.1	2 Scattered precipitation (liquid)	...	No	No	No	3.0	6.0	6.0	NaN	NaN	NaN	NaN

5 rows × 78 columns

In [39]:

Copied!

confound_to_plot = confounds_df.columns.drop(['participant_id','session_number', 'scanner', 'pe_dir', 'remove_jewelry', 'mr_room_helium', 'details_rumination_anxiety_pain', 'time_slept_t1w', 'time_slept_rest', 'time_slept_diffusion'])
confound_to_plot
confound_to_plot = confounds_df.columns.drop(['participant_id','session_number', 'scanner', 'pe_dir', 'remove_jewelry', 'mr_room_helium', 'details_rumination_anxiety_pain', 'time_slept_t1w', 'time_slept_rest', 'time_slept_diffusion'])
confound_to_plot

Out[39]:

Index(['date', 'time', 'weather_temperature', 'weather_wind',
       'weather_precipitation', 'weather_precipitation_type',
       'weather_pressure', 'weather_humidity', 'weather_daylight',
       'blood_pressure', 'caffeine_24h', 'caffeine_2h', 'alcohol', 'triptans',
       'nsaids', 'weight', 'general_health', 'general_stress',
       'ease_concentration', 'beh_work', 'beh_freetime', 'beh_sport',
       'beh_outdoors', 'beh_electronic_devices', 'beh_active_social',
       'beh_passive_social', 'sleep_quality', 'remember_dreams', 'day_dreams',
       'time_to_bed', 'time_in_bed', 'steps', 'distance_walked',
       'stories_climbed', 'calories', 'interested', 'distressed', 'excited',
       'upset', 'strong', 'guilty', 'scared', 'hostile', 'enthusiastic',
       'proud', 'irritable', 'alert', 'ashamed', 'inspired', 'nervous',
       'determined', 'attentive', 'jittery', 'active', 'afraid',
       'filled_all_entries', 'mr_room_temperature', 'mr_room_humidity',
       'mr_room_pressure', 'slept_session', 'slept_t1w', 'slept_diffusion',
       'slept_qct', 'slept_rest', 'slept_bht', 'rumination', 'anxiety',
       'physical_pain'],
      dtype='object')

In [ ]:

Copied!





# Convert 'time' column to datetime, round up to the closest full hour
confounds_df['time'] = pd.to_datetime(confounds_df['time'], format='%H:%M', errors='coerce').dt.round('h').dt.strftime('%H').astype(float)
slept_cols = [col for col in confounds_df.columns if col.startswith('slept')]
confounds_df[slept_cols] = confounds_df[slept_cols].replace({'Yes': 1, 'No': 0})
# Convert 'time' column to datetime, round up to the closest full hour
confounds_df['time'] = pd.to_datetime(confounds_df['time'], format='%H:%M', errors='coerce').dt.round('h').dt.strftime('%H').astype(float)
slept_cols = [col for col in confounds_df.columns if col.startswith('slept')]
confounds_df[slept_cols] = confounds_df[slept_cols].replace({'Yes': 1, 'No': 0})

Visualize the distribution of all confounds and highlight as a red dot the value corresponding to ses-043

In [42]:

Copied!





# Entities specific to this plot
entities = {'visualization': 'confounddist'}
entities.update(entities_base)

# Filter sessions with '043' in session_number (as string, zero-padded if needed)
highlight_mask = confounds_df['session_number'].astype(str).str.zfill(3).str.contains('043')

n_cols = 5
n_plots = len(confound_to_plot)
n_rows = int(np.ceil(n_plots / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3+10))
axes = axes.flatten()

for idx, confound in enumerate(confound_to_plot):
    ax = axes[idx]
    y = confounds_df[confound]
    x = np.arange(len(y))
    ax.scatter(x, y, alpha=0.7)
    # Highlight session 043
    ax.scatter(x[highlight_mask], y[highlight_mask], color='red', label='Session 043')
    ax.set_title(confound)
    ax.grid(True)

# Hide unused subplots
for j in range(idx + 1, len(axes)):
    axes[j].set_visible(False)

#plt.tight_layout()
plt.show()

caption = f"**Confounds distribution.** To try to understand whether some confound can explain that Session 043 is seemingly less correlated" \
"to the other sessions, we plotted the distribution of all confounds and highlighted as a red dot the value corresponding to Session 043." \
"Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions." \
"Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor," \
"away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual." \
"This session is also associated with the only time the sleep quality was recorded at the minimum."
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'confounddist'}
entities.update(entities_base)

# Filter sessions with '043' in session_number (as string, zero-padded if needed)
highlight_mask = confounds_df['session_number'].astype(str).str.zfill(3).str.contains('043')

n_cols = 5
n_plots = len(confound_to_plot)
n_rows = int(np.ceil(n_plots / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3+10))
axes = axes.flatten()

for idx, confound in enumerate(confound_to_plot):
    ax = axes[idx]
    y = confounds_df[confound]
    x = np.arange(len(y))
    ax.scatter(x, y, alpha=0.7)
    # Highlight session 043
    ax.scatter(x[highlight_mask], y[highlight_mask], color='red', label='Session 043')
    ax.set_title(confound)
    ax.grid(True)

# Hide unused subplots
for j in range(idx + 1, len(axes)):
    axes[j].set_visible(False)

#plt.tight_layout()
plt.show()

caption = f"**Confounds distribution.** To try to understand whether some confound can explain that Session 043 is seemingly less correlated" \
"to the other sessions, we plotted the distribution of all confounds and highlighted as a red dot the value corresponding to Session 043." \
"Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions." \
"Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor," \
"away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual." \
"This session is also associated with the only time the sleep quality was recorded at the minimum."
save_caption(caption, save_layout, entities)

Not that it justifies exclusion of ses-043, but it is interesting to see that ses-043 is an outlier in different confound dimensions. Notably, it was one of the only two sessions where the participant reported to be socially active for extended duration, to be outdoor, away from electronic devices, to have more freetime and to have walked more distance and more number of steps than usual. This session is also associated with the only time the sleep quality was recorded at the minimum.