Does replaying the same movie clip reduce the variability of resting-state fMRI?¶
The interactive version of this notebook is available at https://github.com/TheAxonLab/hcph-sops/tree/mkdocs/docs/analysis.
In [1]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import nibabel as nib
from pathlib import Path
from scipy import stats
from statsmodels.stats.multitest import fdrcorrection
from confounds import get_confounds
from load_save import save_caption, load_matrices, init_save_layout
#Define filtering parameters
metric = "correlation"
atlas_dimension = 128
fd_threshold = 0.5
fdthresh_str = str(fd_threshold).replace(".", "")
# Define paths
code_path = Path("/data/code/hcph-sops/")
fc_path = Path(f"/data/derivatives/hcph-fc/")
dataset_path = Path("/data/datasets/hcph-dataset")
suppl_path = Path("/data/derivatives/hcph-suppl/")
confound_path = Path("/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv")
iqms_path = Path("/data/derivatives/hcph-mriqc/group_bold.tsv")
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import nibabel as nib
from pathlib import Path
from scipy import stats
from statsmodels.stats.multitest import fdrcorrection
from confounds import get_confounds
from load_save import save_caption, load_matrices, init_save_layout
#Define filtering parameters
metric = "correlation"
atlas_dimension = 128
fd_threshold = 0.5
fdthresh_str = str(fd_threshold).replace(".", "")
# Define paths
code_path = Path("/data/code/hcph-sops/")
fc_path = Path(f"/data/derivatives/hcph-fc/")
dataset_path = Path("/data/datasets/hcph-dataset")
suppl_path = Path("/data/derivatives/hcph-suppl/")
confound_path = Path("/data/datasets/hcph-dataset/phenotype/mood_env_quest.tsv")
iqms_path = Path("/data/derivatives/hcph-mriqc/group_bold.tsv")
Load the functional connectivity matrices¶
In [2]:
Copied!
# Set up entities_base for the matrix loading
entities_base = {
'subject': '001',
'task': 'rsmovie',
'measure': metric,
'scale': str(atlas_dimension),
'fd_threshold': fdthresh_str,
}
# Load the FC matrices and associated information using the helper function
matrices_dict = load_matrices(fc_path, entities_base, code_path)
fc_concat = matrices_dict["conn_concat"]
fc_matrix = matrices_dict["conn_matrix"]
files = matrices_dict["files"]
atlas_labels = matrices_dict["atlas_labels"]
region_labels = matrices_dict["region_labels"]
fc_size = matrices_dict["conn_size"]
ses_index = matrices_dict["ses_index"]
# Intialize the BIDSLayout to save the figures and their associated captions
save_layout = init_save_layout(code_path, suppl_path)
# Plot an example FC matrix (the last one loaded)
fc_matrix = np.zeros((fc_size, fc_size))
# reconstruct from upper triangle if needed (optional, for visualization)
fc_matrix[np.triu_indices(fc_size, k=0)] = fc_concat[-1].flatten()
fc_matrix += np.triu(fc_matrix, k=1).T
plt.figure(figsize=(8,6))
sns.heatmap(fc_matrix, cmap='viridis')
plt.title(f"Example FC matrix")
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
print(f"Number of brain regions (matrix size): {fc_size}")
print(f"Shape of concatenated upper triangle FC matrices (number of sessions, number of region pairs): {fc_concat.shape}")
# Test whether the shape is as expected
assert fc_concat.shape == (36, fc_size * (fc_size + 1) // 2)
# Set up entities_base for the matrix loading
entities_base = {
'subject': '001',
'task': 'rsmovie',
'measure': metric,
'scale': str(atlas_dimension),
'fd_threshold': fdthresh_str,
}
# Load the FC matrices and associated information using the helper function
matrices_dict = load_matrices(fc_path, entities_base, code_path)
fc_concat = matrices_dict["conn_concat"]
fc_matrix = matrices_dict["conn_matrix"]
files = matrices_dict["files"]
atlas_labels = matrices_dict["atlas_labels"]
region_labels = matrices_dict["region_labels"]
fc_size = matrices_dict["conn_size"]
ses_index = matrices_dict["ses_index"]
# Intialize the BIDSLayout to save the figures and their associated captions
save_layout = init_save_layout(code_path, suppl_path)
# Plot an example FC matrix (the last one loaded)
fc_matrix = np.zeros((fc_size, fc_size))
# reconstruct from upper triangle if needed (optional, for visualization)
fc_matrix[np.triu_indices(fc_size, k=0)] = fc_concat[-1].flatten()
fc_matrix += np.triu(fc_matrix, k=1).T
plt.figure(figsize=(8,6))
sns.heatmap(fc_matrix, cmap='viridis')
plt.title(f"Example FC matrix")
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
print(f"Number of brain regions (matrix size): {fc_size}")
print(f"Shape of concatenated upper triangle FC matrices (number of sessions, number of region pairs): {fc_concat.shape}")
# Test whether the shape is as expected
assert fc_concat.shape == (36, fc_size * (fc_size + 1) // 2)
[get_dataset_dir] Dataset found in /home/cprovins/nilearn_data/difumo_atlases Configuration 'hcph' already exists, skipping add_config_paths. Number of brain regions (matrix size): 119 Shape of concatenated upper triangle FC matrices (number of sessions, number of region pairs): (36, 7140)
One remark to note is the number of brain regions does not correspond to the atlas_dimension, because we removed from the DiFuMo atlas the components that are specific to the CSF, ventricles and sinuses.
In [3]:
Copied!
# Add the information that apply too all figures generated below to the base entities
entities_base["experiment"] = "fmrivar"
entities_base["extension"] = ".svg"
entities_base["datatype"] = "figures"
entities_base["suffix"] = "bold"
# Add the information that apply too all figures generated below to the base entities
entities_base["experiment"] = "fmrivar"
entities_base["extension"] = ".svg"
entities_base["datatype"] = "figures"
entities_base["suffix"] = "bold"
In [4]:
Copied!
# Entities specific to this plot
entities = {'visualization': 'similmat'}
entities.update(entities_base)
similarity_matrix = np.corrcoef(fc_concat.T, rowvar=False)
print(f"Shape of the similarity matrix (number of sessions x number of sessions): {similarity_matrix.shape}")
mean_corr = np.mean(similarity_matrix)
print(f"Overall mean correlation: {mean_corr}")
plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, cmap='viridis')
plt.xticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index, rotation=90)
plt.yticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Session Index', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Similarity matrix presenting the Pearson correlation between FC matrices for each session.**The overall mean correlation is {mean_corr:.2f}." \
"The fact that the FC matrices are highly correlated probably explains why only two components are enough to explain the vast majority of the variance." \
"It is also visible that session index 30 is less correlated with the other sessions compared to the other pair-wise comparisons."
save_caption(caption, save_layout, entities)
# Entities specific to this plot
entities = {'visualization': 'similmat'}
entities.update(entities_base)
similarity_matrix = np.corrcoef(fc_concat.T, rowvar=False)
print(f"Shape of the similarity matrix (number of sessions x number of sessions): {similarity_matrix.shape}")
mean_corr = np.mean(similarity_matrix)
print(f"Overall mean correlation: {mean_corr}")
plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, cmap='viridis')
plt.xticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index, rotation=90)
plt.yticks(ticks=np.arange(len(ses_index))+0.5, labels=ses_index)
plt.xlabel('Session Index', fontsize=14)
plt.ylabel('Session Index', fontsize=14)
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = f"**Similarity matrix presenting the Pearson correlation between FC matrices for each session.**The overall mean correlation is {mean_corr:.2f}." \
"The fact that the FC matrices are highly correlated probably explains why only two components are enough to explain the vast majority of the variance." \
"It is also visible that session index 30 is less correlated with the other sessions compared to the other pair-wise comparisons."
save_caption(caption, save_layout, entities)
Shape of the similarity matrix (number of sessions x number of sessions): (36, 36) Overall mean correlation: 0.8581150373039423
Run PCA to estimate the residual variance for each rsfMRI run¶
Choose number of principal components¶
We are using the elbow plot to decide the number of principal components to keep.
In [5]:
Copied!
from sklearn.decomposition import PCA
# Entities specific to this plot
entities = {'visualization': 'explainedvar'}
entities.update(entities_base)
# Run PCA
# Center the data by removing the mean of each connection
fc_concat = fc_concat - np.mean(fc_concat, axis=0)
# Standardize the data to have unit variance
#fc_concat = fc_concat / np.std(fc_concat, axis=0)
# Run PCA
pca = PCA()
pca.fit(fc_concat)
# Extract the explained variance
explained_variance = pca.explained_variance_ratio_
# Generate the elbow plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='-')
ax.set_xlabel('Number of Components', fontsize=14)
ax.set_ylabel('Explained Variance Ratio', fontsize=14)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#Define the number of components to keep
n_comp = 6
# Highlight the points corresponding to number of components we keep in red
ax.plot(n_comp, explained_variance[n_comp-1], marker='o', markersize=8, color='red')
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = "**Elbow plot to choose the number of principal components.**The point in red indicates the number of components selected."
save_caption(caption, save_layout, entities)
from sklearn.decomposition import PCA
# Entities specific to this plot
entities = {'visualization': 'explainedvar'}
entities.update(entities_base)
# Run PCA
# Center the data by removing the mean of each connection
fc_concat = fc_concat - np.mean(fc_concat, axis=0)
# Standardize the data to have unit variance
#fc_concat = fc_concat / np.std(fc_concat, axis=0)
# Run PCA
pca = PCA()
pca.fit(fc_concat)
# Extract the explained variance
explained_variance = pca.explained_variance_ratio_
# Generate the elbow plot
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(range(1, len(explained_variance) + 1), explained_variance, marker='o', linestyle='-')
ax.set_xlabel('Number of Components', fontsize=14)
ax.set_ylabel('Explained Variance Ratio', fontsize=14)
ax.tick_params(axis='x', labelsize=12)
ax.tick_params(axis='y', labelsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
#Define the number of components to keep
n_comp = 6
# Highlight the points corresponding to number of components we keep in red
ax.plot(n_comp, explained_variance[n_comp-1], marker='o', markersize=8, color='red')
save_path = save_layout.build_path(entities, validate=False)
plt.savefig(save_path)
plt.show()
caption = "**Elbow plot to choose the number of principal components.**The point in red indicates the number of components selected."
save_caption(caption, save_layout, entities)
Plot the principal components¶
In [6]:
Copied!
# Function to reconstruct the full matrix from the upper triangle
def reconstruct_full_matrix(upper_triangle, size):
full_matrix = np.zeros((size, size))
upper_indices = np.triu_indices(size)
full_matrix[upper_indices] = upper_triangle
full_matrix = full_matrix + full_matrix.T - np.diag(np.diag(full_matrix))
return full_matrix
# Reorder regions by network
network_order = atlas_labels['yeo_networks7'].argsort()
reordered_regions_labels = atlas_labels.iloc[network_order]['difumo_names']
for i, pc in enumerate(pca.components_[:n_comp, :]):
# Reconstruct the full matrix from the upper triangle
full_matrix = reconstruct_full_matrix(pc, fc_size)
# Cluster regions by network
full_matrix = full_matrix[network_order, :][:, network_order]
# Plot the principal component as a heatmap
plt.figure(figsize=(20, 18))
sns.heatmap(full_matrix, cmap='viridis')
plt.title(f'Principal Component {i+1}')
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
# Adjust y-ticks to move them further away from the heatmap
plt.yticks(ticks=np.arange(len(reordered_regions_labels)) + 0.5, labels=reordered_regions_labels, va='center', rotation=0)
plt.gca().tick_params(axis='y', pad=15) # Move y-ticks further away
plt.gca().tick_params(axis='x', pad=15) # Move x-ticks further away
# Add a color bar on the left and bottom ledge indicating the network
network_colors = sns.color_palette("tab20", n_colors=7) # Define 7 colors for the networks
network_segments = atlas_labels.iloc[network_order]['yeo_networks7'].astype('category').cat.codes
# Add color bar on the left with increased thickness
for idx, color in enumerate(network_segments):
plt.gca().add_patch(plt.Rectangle((-1.5, idx), 1.5, 1, color=network_colors[color], transform=plt.gca().transData, clip_on=False))
# Add color bar on the bottom with increased thickness
for idx, color in enumerate(network_segments):
plt.gca().add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 1.5, color=network_colors[color], transform=plt.gca().transData, clip_on=False))
# Add legend for the networks
network_labels = atlas_labels['yeo_networks7'].astype('category').cat.categories
legend_patches = [plt.Line2D([0], [0], color=network_colors[i], lw=4, label=label) for i, label in enumerate(network_labels)]
plt.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(1.37, 1), title="Networks", fontsize=12, title_fontsize=14)
# Plot all principal components as heatmaps in a single figure with subplots
n_rows = int(np.ceil(n_comp / 3))
n_cols = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))
for i, pc in enumerate(pca.components_[:n_comp, :]):
full_matrix = reconstruct_full_matrix(pc, fc_size)
# Cluster regions by network
full_matrix = full_matrix[network_order, :][:, network_order]
ax = axes[i // n_cols, i % n_cols]
sns.heatmap(full_matrix, cmap='viridis', ax=ax, cbar_kws={'shrink': 0.5})
ax.set_title(f'Principal Component {i+1}')
ax.set_xlabel('Brain Regions', labelpad=20)
ax.set_ylabel('Brain Regions', labelpad=20)
ax.set_xticks([]) # Hide x-ticks
ax.set_yticks([]) # Hide y-ticks
# Add color bar on the left with increased thickness
for idx, color in enumerate(network_segments):
ax.add_patch(plt.Rectangle((-3, idx), 3, 1, color=network_colors[color], transform=ax.transData, clip_on=False))
# Add color bar on the bottom with increased thickness
for idx, color in enumerate(network_segments):
ax.add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 3, color=network_colors[color], transform=ax.transData, clip_on=False))
# Add a single legend for the networks on the right of the figure
fig.legend(
handles=legend_patches,
loc='center right',
bbox_to_anchor=(1.1, 0.5), # Position the legend outside the figure, vertically centered
title="Networks",
fontsize=10,
title_fontsize=12
)
# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
axes[j // n_cols, j % n_cols].axis('off')
plt.tight_layout()
plt.show()
# Function to reconstruct the full matrix from the upper triangle
def reconstruct_full_matrix(upper_triangle, size):
full_matrix = np.zeros((size, size))
upper_indices = np.triu_indices(size)
full_matrix[upper_indices] = upper_triangle
full_matrix = full_matrix + full_matrix.T - np.diag(np.diag(full_matrix))
return full_matrix
# Reorder regions by network
network_order = atlas_labels['yeo_networks7'].argsort()
reordered_regions_labels = atlas_labels.iloc[network_order]['difumo_names']
for i, pc in enumerate(pca.components_[:n_comp, :]):
# Reconstruct the full matrix from the upper triangle
full_matrix = reconstruct_full_matrix(pc, fc_size)
# Cluster regions by network
full_matrix = full_matrix[network_order, :][:, network_order]
# Plot the principal component as a heatmap
plt.figure(figsize=(20, 18))
sns.heatmap(full_matrix, cmap='viridis')
plt.title(f'Principal Component {i+1}')
plt.xlabel('Brain Regions')
plt.ylabel('Brain Regions')
# Adjust y-ticks to move them further away from the heatmap
plt.yticks(ticks=np.arange(len(reordered_regions_labels)) + 0.5, labels=reordered_regions_labels, va='center', rotation=0)
plt.gca().tick_params(axis='y', pad=15) # Move y-ticks further away
plt.gca().tick_params(axis='x', pad=15) # Move x-ticks further away
# Add a color bar on the left and bottom ledge indicating the network
network_colors = sns.color_palette("tab20", n_colors=7) # Define 7 colors for the networks
network_segments = atlas_labels.iloc[network_order]['yeo_networks7'].astype('category').cat.codes
# Add color bar on the left with increased thickness
for idx, color in enumerate(network_segments):
plt.gca().add_patch(plt.Rectangle((-1.5, idx), 1.5, 1, color=network_colors[color], transform=plt.gca().transData, clip_on=False))
# Add color bar on the bottom with increased thickness
for idx, color in enumerate(network_segments):
plt.gca().add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 1.5, color=network_colors[color], transform=plt.gca().transData, clip_on=False))
# Add legend for the networks
network_labels = atlas_labels['yeo_networks7'].astype('category').cat.categories
legend_patches = [plt.Line2D([0], [0], color=network_colors[i], lw=4, label=label) for i, label in enumerate(network_labels)]
plt.legend(handles=legend_patches, loc='upper right', bbox_to_anchor=(1.37, 1), title="Networks", fontsize=12, title_fontsize=14)
# Plot all principal components as heatmaps in a single figure with subplots
n_rows = int(np.ceil(n_comp / 3))
n_cols = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))
for i, pc in enumerate(pca.components_[:n_comp, :]):
full_matrix = reconstruct_full_matrix(pc, fc_size)
# Cluster regions by network
full_matrix = full_matrix[network_order, :][:, network_order]
ax = axes[i // n_cols, i % n_cols]
sns.heatmap(full_matrix, cmap='viridis', ax=ax, cbar_kws={'shrink': 0.5})
ax.set_title(f'Principal Component {i+1}')
ax.set_xlabel('Brain Regions', labelpad=20)
ax.set_ylabel('Brain Regions', labelpad=20)
ax.set_xticks([]) # Hide x-ticks
ax.set_yticks([]) # Hide y-ticks
# Add color bar on the left with increased thickness
for idx, color in enumerate(network_segments):
ax.add_patch(plt.Rectangle((-3, idx), 3, 1, color=network_colors[color], transform=ax.transData, clip_on=False))
# Add color bar on the bottom with increased thickness
for idx, color in enumerate(network_segments):
ax.add_patch(plt.Rectangle((idx, full_matrix.shape[0]), 1, 3, color=network_colors[color], transform=ax.transData, clip_on=False))
# Add a single legend for the networks on the right of the figure
fig.legend(
handles=legend_patches,
loc='center right',
bbox_to_anchor=(1.1, 0.5), # Position the legend outside the figure, vertically centered
title="Networks",
fontsize=10,
title_fontsize=12
)
# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
axes[j // n_cols, j % n_cols].axis('off')
plt.tight_layout()
plt.show()