Calibrating regressor to flag HCPh T1w images for exclusion
The interactive version of this notebook is available at https://github.com/TheAxonLab/hcph-sops/tree/mkdocs/docs/analysis/qc-regressor-hcph.ipynb
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from mriqc_learn.datasets import load_data
from mriqc_learn.models import preprocess as pp
from mriqc_learn.models.production import init_pipeline_xgboost, init_pipeline_naive
Load HCPh data¶
iqms_path = Path('/home/cprovins/data/hcph-derivatives-mriqc/group_T1w.tsv')
manual_ratings_path = Path('/home/cprovins/data/hcph-derivatives-mriqc/desc-ratings_T1w.tsv')
df_iqms = pd.read_csv(iqms_path, sep='\t')
df_manual_ratings = pd.read_csv(manual_ratings_path, sep='\t')
# Merge the two dataframes based on the filename
# Cannot handle two raters for now!
df = pd.merge(df_manual_ratings, df_iqms, left_on='subject', right_on='bids_name', how='inner')
assert all(df['subject'] == df['bids_name'])
# Move the column bids_name to the first column
df = df[['bids_name'] + [col for col in df.columns if col != 'bids_name']]
df.drop(columns=['subject'], inplace=True)
df
bids_name | rater_id | dataset | rating | artifacts | time_sec | confidence | comments | cjv | cnr | ... | summary_wm_mean | summary_wm_median | summary_wm_n | summary_wm_p05 | summary_wm_p95 | summary_wm_stdv | tpm_overlap_csf | tpm_overlap_gm | tpm_overlap_wm | wm2max | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | sub-001_ses-excl027_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.55 | ["eye-spillover","noise-local"] | 209.948 | 2.50 | local noise close to the ventricles | 0.604348 | 1.606833 | ... | 934.354938 | 955.617892 | 1.015999e+06 | 747.667651 | 1056.077599 | 94.405582 | 0.182043 | 0.522926 | 0.545003 | 0.477761 |
1 | sub-001_ses-excl027_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.60 | [] | 76.032 | 3.50 | NaN | 0.604348 | 1.606833 | ... | 934.354938 | 955.617892 | 1.015999e+06 | 747.667651 | 1056.077599 | 94.405582 | 0.182043 | 0.522926 | 0.545003 | 0.477761 |
2 | sub-001_ses-excl029_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.45 | ["eye-spillover","inu"] | 113.066 | 3.50 | light INU | 0.596304 | 1.650305 | ... | 937.143691 | 957.493211 | 1.013283e+06 | 754.115148 | 1055.926475 | 92.421504 | 0.181235 | 0.523413 | 0.544391 | 0.491119 |
3 | sub-001_ses-excl029_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.55 | ["eye-spillover"] | 60.220 | 3.50 | NaN | 0.596304 | 1.650305 | ... | 937.143691 | 957.493211 | 1.013283e+06 | 754.115148 | 1055.926475 | 92.421504 | 0.181235 | 0.523413 | 0.544391 | 0.491119 |
4 | sub-001_ses-pilot001_acq-morphobox_T1w | celine | mriqc-23.2.0-withoutdwi | 1.20 | ["ghost-aliasing"] | 84.348 | 4.00 | low resolution | 0.589266 | 1.757282 | ... | 928.132422 | 952.478942 | 4.765519e+05 | 736.106579 | 1048.748755 | 96.711931 | 0.207131 | 0.542773 | 0.559154 | 0.524738 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79 | sub-001_ses-045_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 2.45 | ["head-motion","eye-spillover"] | 133.657 | 3.05 | HM ringing visible in T1w lower frontal lobe | 0.604583 | 1.588400 | ... | 931.165777 | 954.003248 | 1.029516e+06 | 739.441554 | 1053.858769 | 96.443148 | 0.183835 | 0.526239 | 0.543494 | 0.484465 |
80 | sub-001_ses-046_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.05 | ["head-motion","eye-spillover"] | 125.347 | 3.45 | subtle HM ringing | 0.613529 | 1.589260 | ... | 933.199076 | 954.786639 | 1.010457e+06 | 748.510799 | 1053.416242 | 93.550235 | 0.177092 | 0.517983 | 0.542767 | 0.493663 |
81 | sub-001_ses-047_acq-undistorted_run-1_T1w | celine | mriqc-23.2.0-withoutdwi | 1.65 | ["eye-spillover","uncategorized"] | 88.320 | 3.80 | low contrast | 0.650858 | 1.448968 | ... | 931.741325 | 950.959938 | 1.042026e+06 | 746.521985 | 1063.578192 | 96.576782 | 0.181927 | 0.520435 | 0.539815 | 0.295455 |
82 | sub-001_ses-047_acq-undistorted_run-2_T1w | celine | mriqc-23.2.0-withoutdwi | 1.60 | ["eye-spillover","inu","uncategorized"] | 58.408 | 3.65 | low contrast | 0.661949 | 1.289697 | ... | 930.195692 | 944.551342 | 1.042298e+06 | 749.759529 | 1067.060620 | 95.413866 | 0.188724 | 0.522034 | 0.537868 | 0.282123 |
83 | sub-001_ses-047_acq-undistorted_run-3_T1w | celine | mriqc-23.2.0-withoutdwi | 1.65 | ["eye-spillover","inu","uncategorized"] | 53.255 | 3.70 | low contrast | 0.659533 | 1.348779 | ... | 932.941535 | 947.395483 | 1.047439e+06 | 755.562688 | 1065.594837 | 93.783535 | 0.191076 | 0.525830 | 0.541656 | 0.281388 |
84 rows × 76 columns
# Split the data
(train_x, train_y), (_, _) = load_data(df, seed=2978, split_strategy="none")
train_y
bids_name | rater_id | dataset | rating | artifacts | time_sec | confidence | comments | |
---|---|---|---|---|---|---|---|---|
0 | sub-001_ses-excl027_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.55 | ["eye-spillover","noise-local"] | 209.948 | 2.50 | local noise close to the ventricles |
1 | sub-001_ses-excl027_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.60 | [] | 76.032 | 3.50 | NaN |
2 | sub-001_ses-excl029_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.45 | ["eye-spillover","inu"] | 113.066 | 3.50 | light INU |
3 | sub-001_ses-excl029_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.55 | ["eye-spillover"] | 60.220 | 3.50 | NaN |
4 | sub-001_ses-pilot001_acq-morphobox_T1w | celine | mriqc-23.2.0-withoutdwi | 1.20 | ["ghost-aliasing"] | 84.348 | 4.00 | low resolution |
... | ... | ... | ... | ... | ... | ... | ... | ... |
79 | sub-001_ses-045_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 2.45 | ["head-motion","eye-spillover"] | 133.657 | 3.05 | HM ringing visible in T1w lower frontal lobe |
80 | sub-001_ses-046_acq-undistorted_T1w | celine | mriqc-23.2.0-withoutdwi | 3.05 | ["head-motion","eye-spillover"] | 125.347 | 3.45 | subtle HM ringing |
81 | sub-001_ses-047_acq-undistorted_run-1_T1w | celine | mriqc-23.2.0-withoutdwi | 1.65 | ["eye-spillover","uncategorized"] | 88.320 | 3.80 | low contrast |
82 | sub-001_ses-047_acq-undistorted_run-2_T1w | celine | mriqc-23.2.0-withoutdwi | 1.60 | ["eye-spillover","inu","uncategorized"] | 58.408 | 3.65 | low contrast |
83 | sub-001_ses-047_acq-undistorted_run-3_T1w | celine | mriqc-23.2.0-withoutdwi | 1.65 | ["eye-spillover","inu","uncategorized"] | 53.255 | 3.70 | low contrast |
84 rows × 8 columns
We get a bunch of information from the MRIQC rating widget. However, for the regression only the rating is of interest.
# Keep only the rating
train_y = np.array(train_y['rating']).reshape(-1, 1)
Let's print out a pretty view of the data table:
train_x
cjv | cnr | efc | fber | fwhm_avg | fwhm_x | fwhm_y | fwhm_z | icvs_csf | icvs_gm | ... | summary_wm_mean | summary_wm_median | summary_wm_n | summary_wm_p05 | summary_wm_p95 | summary_wm_stdv | tpm_overlap_csf | tpm_overlap_gm | tpm_overlap_wm | wm2max | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.604348 | 1.606833 | 0.536305 | 10137.548416 | 4.688191 | 4.331934 | 5.241601 | 4.491037 | 0.275428 | 0.378461 | ... | 934.354938 | 955.617892 | 1.015999e+06 | 747.667651 | 1056.077599 | 94.405582 | 0.182043 | 0.522926 | 0.545003 | 0.477761 |
1 | 0.604348 | 1.606833 | 0.536305 | 10137.548416 | 4.688191 | 4.331934 | 5.241601 | 4.491037 | 0.275428 | 0.378461 | ... | 934.354938 | 955.617892 | 1.015999e+06 | 747.667651 | 1056.077599 | 94.405582 | 0.182043 | 0.522926 | 0.545003 | 0.477761 |
2 | 0.596304 | 1.650305 | 0.530764 | 14122.389990 | 4.688262 | 4.344506 | 5.236868 | 4.483412 | 0.275655 | 0.379307 | ... | 937.143691 | 957.493211 | 1.013283e+06 | 754.115148 | 1055.926475 | 92.421504 | 0.181235 | 0.523413 | 0.544391 | 0.491119 |
3 | 0.596304 | 1.650305 | 0.530764 | 14122.389990 | 4.688262 | 4.344506 | 5.236868 | 4.483412 | 0.275655 | 0.379307 | ... | 937.143691 | 957.493211 | 1.013283e+06 | 754.115148 | 1055.926475 | 92.421504 | 0.181235 | 0.523413 | 0.544391 | 0.491119 |
4 | 0.589266 | 1.757282 | 0.513590 | -1.000000 | 3.978445 | 3.840736 | 4.297390 | 3.797210 | 0.273247 | 0.380469 | ... | 928.132422 | 952.478942 | 4.765519e+05 | 736.106579 | 1048.748755 | 96.711931 | 0.207131 | 0.542773 | 0.559154 | 0.524738 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
79 | 0.604583 | 1.588400 | 0.522964 | 13707.122123 | 4.687097 | 4.325848 | 5.230143 | 4.505300 | 0.271614 | 0.379977 | ... | 931.165777 | 954.003248 | 1.029516e+06 | 739.441554 | 1053.858769 | 96.443148 | 0.183835 | 0.526239 | 0.543494 | 0.484465 |
80 | 0.613529 | 1.589260 | 0.521511 | 14608.441198 | 4.746154 | 4.386391 | 5.296872 | 4.555200 | 0.276620 | 0.377185 | ... | 933.199076 | 954.786639 | 1.010457e+06 | 748.510799 | 1053.416242 | 93.550235 | 0.177092 | 0.517983 | 0.542767 | 0.493663 |
81 | 0.650858 | 1.448968 | 0.526573 | 613.466801 | 5.362943 | 4.963693 | 5.851936 | 5.273200 | 0.270652 | 0.376713 | ... | 931.741325 | 950.959938 | 1.042026e+06 | 746.521985 | 1063.578192 | 96.576782 | 0.181927 | 0.520435 | 0.539815 | 0.295455 |
82 | 0.661949 | 1.289697 | 0.537092 | 482.152209 | 5.432877 | 5.020905 | 5.953775 | 5.323950 | 0.269681 | 0.376620 | ... | 930.195692 | 944.551342 | 1.042298e+06 | 749.759529 | 1067.060620 | 95.413866 | 0.188724 | 0.522034 | 0.537868 | 0.282123 |
83 | 0.659533 | 1.348779 | 0.545570 | 364.087655 | 5.360663 | 5.005466 | 5.866987 | 5.209537 | 0.268028 | 0.376936 | ... | 932.941535 | 947.395483 | 1.047439e+06 | 755.562688 | 1065.594837 | 93.783535 | 0.191076 | 0.525830 | 0.541656 | 0.281388 |
84 rows × 68 columns
Cross-validation of the default regressor¶
Let's cross-validate the performance of our regressor using a Repeated K-fold strategy.
# Define a splitting strategy
outer_cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=2978)
We can now feed the model into the cross-validation loop:
cv_scores = cross_val_score(
init_pipeline_xgboost(),
X=train_x,
y=train_y,
cv=outer_cv,
scoring="neg_mean_absolute_error",
n_jobs=-1,
)
We quantify the regressor performance using Mean Absolute Error (MAE).
cv_scores = np.absolute(cv_scores)
print('Mean MAE: %.3f (%.3f)' % (cv_scores.mean(), cv_scores.std()) )
Mean MAE: 0.410 (0.105)
A good mean absolute error (MAE) is relative to the dataset at hand. As such, we establish a baseline MSE for our dataset using a naive predictive model that always predicts the mean or the median of the target values.
cv_scores = cross_val_score(
init_pipeline_naive(strategy='mean'),
X=train_x,
y=train_y,
cv=outer_cv,
scoring="neg_mean_absolute_error",
n_jobs=-1,
)
cv_scores = np.absolute(cv_scores)
print('Mean baseline MAE: %.3f (%.3f)' % (cv_scores.mean(), cv_scores.std()) )
Mean baseline MAE: 0.741 (0.164)
cv_scores = cross_val_score(
init_pipeline_naive(strategy='median'),
X=train_x,
y=train_y,
cv=outer_cv,
scoring="neg_mean_absolute_error",
n_jobs=-1,
)
cv_scores = np.absolute(cv_scores)
print('Mean baseline MAE: %.3f (%.3f)' % (cv_scores.mean(), cv_scores.std()) )
Mean baseline MAE: 0.733 (0.165)
Optimization of the hyperparameters¶
# Define the parameter grid for hyperparameter tuning
param_grid = {
'model__learning_rate': [0.1, 0.01, 0.001],
'model__max_depth': [3, 7],
'model__n_estimators': [50, 70, 90],
'model__eta': [0.1, 0.01],
'model__subsample': [0.7, 1.0],
}
# Initialize the XGBoost pipeline
pipeline = init_pipeline_xgboost()
# Initialize the GridSearchCV with the pipeline, parameter grid, and scoring metric
grid_search = GridSearchCV(
estimator=pipeline,
param_grid=param_grid,
scoring='neg_mean_absolute_error',
cv=outer_cv,
n_jobs=-1
)
grid_search.fit(train_x, y=train_y)
grid_search.best_params_
# best_parameters = grid_search.best_params
Cross-validation of the optimized regressor¶
# Define a splitting strategy
outer_cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=2978)
We can now feed the model into the cross-validation loop:
# values are hard-coded because they were obtained by running the code on a cluster rather than locally in the notebook
best_params = {'eta': 0.1, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 90, 'subsample': 0.7}
cv_scores = cross_val_score(
estimator=init_pipeline_xgboost(**best_params),
X=train_x,
y=train_y,
cv=outer_cv,
scoring="neg_mean_absolute_error",
n_jobs=-1,
)
cv_scores = np.absolute(cv_scores)
print('Mean MAE: %.3f (%.3f)' % (cv_scores.mean(), cv_scores.std()) )
Mean MAE: 0.381 (0.098)