Analyze SMA data after SCrOFit
This notebook demosntrate how to analyze the SMA data after running SCrOFit.
%load_ext autoreload
%autoreload
import anndata as ad
import scanpy as sc
import matplotlib.pyplot as plt
import squidpy as sq
import numpy as np
import sys
sys.path.append('../../')
import scrofit.plotting as pl
sample = 'V11T17-102'
in_dir = f'../../../data/MSA_data/{sample}'
st_adata = ad.read_h5ad(f'{in_dir}/{sample}_st_pp.adata')
sm_adata = ad.read_h5ad(f'{in_dir}/{sample}_sm_pp.adata')
mdata = ad.read_h5ad(f'{in_dir}/{sample}_m_pp.adata')
sm_adata.obsm['SMA\nSM OCS'] = sm_adata.obsm['spatial']
st_adata.obsm['SMA\nST OCS'] = st_adata.obsm['spatial']
sc.pl.spatial(st_adata, basis='SMA\nST OCS')
mymap = {'unk': np.nan, 'unk2': np.nan}
st_adata.obs['Region'] = st_adata.obs['RegionLoupe'].apply(lambda x: mymap.get(x, x))
pl.ocs_spatial_scatter(st_adata, 'ST', color=['Region'], img=True, shape=None, library_id=sample,
size=0.5,
out_fn='ocs_region.pdf')
mdata.obs['ST_Region'] = mdata.obs['ST_RegionLoupe'].apply(lambda x: mymap.get(x, x))
pl.ccs_spatial_scatter(mdata, 'ST', color=['ST_Region'], img=True, shape=None, library_id=sample,
size=0.5,
out_fn='ccs_region.pdf')
pl.embed(mdata, color='ST_Region', figsize=(2.8, 3),
out_fn='embed_reigon.pdf')
pl.ocs_spatial_scatter(st_adata, 'ST', color=['dopamine'], img=True, shape=None, library_id=sample,
size=0.5, palette='Set2',
out_fn='ocs_dopamine.pdf')
pl.ccs_spatial_scatter(mdata, 'ST', color=['ST_dopamine'], img=True, shape=None, library_id=sample,
size=0.5, palette='Set2',
out_fn='ccs_dopamine.pdf')
sm_adata
%autoreload
for color in st_adata.obs.columns:
if not color.startswith('MSN'):
continue
pl.ocs_spatial_scatter(st_adata, 'ST', color=color, img=True, shape=None,
library_id=sample, cmap='OrRd', size=0.5,
out_fn=f'ocs_{color}.pdf')
%autoreload
for gene in ['SCG2', 'CHN1', 'NNAT', 'GFAP', 'MBP', 'TF', 'PLP1']:
pl.ocs_spatial_scatter(st_adata, 'ST', color=gene, img=True, shape=None,
library_id=sample, cmap='Spectral_r', size=0.5,
out_fn=f'ocs_{gene}.pdf')
%autoreload
pl.ocs_spatial_scatter(sm_adata, 'SM', color='mz_674.2805', img=False, shape=None, library_id=sample,
cmap='viridis', size=0.5,
out_fn='ocs_mz_674.2805.pdf')
pl.ccs_spatial_scatter(mdata, 'SM', color='mz_674.2805', img=False, shape=None, library_id=sample,
cmap='plasma', size=0.5,
out_fn='ccs_mz_674.2805.pdf')
pl.embed(mdata, color='mz_674.2805', cmap='plasma', figsize=(3.2, 3), colorbar_loc='right',
out_fn='embed_mz_674.2805.pdf')
%autoreload
pl.ocs_spatial_scatter(st_adata, 'ST', color=['Cell_Type'], img=True, shape=None, library_id=sample,
palette='Accent', size=0.5,
out_fn='ocs_cell_type.pdf')
pl.ccs_spatial_scatter(mdata, 'ST', color=['ST_Cell_Type'], img=True, shape=None, library_id=sample,
palette='Accent', size=0.5,
out_fn='ccs_cell_type.pdf')
pl.embed(mdata, color='ST_Cell_Type', palette='Accent', figsize=(2.8, 3),
out_fn='embed_cell_type.pdf')
st_adata.obsm
mdata.obsm
%autoreload
x_min=35000
x_max=x_min+5000
y_min=35000
y_max=y_min+5000
pl.ccs_spatial_zoom(
st_adata, mdata,
metabolite="mz_674.2805",
cell_type_col="Cell_Type",
st_coord_key="spatial_rir",
sm_coord_key="SM_spatial_rir",
cell_dot_size=250,
x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max,
figsize=(4,4),
st_cmap="Accent",
m_cmap='plasma',
out_fn='ccs_zoom_cell_type.pdf'
)
%autoreload
pl.ccs_spatial_zoom(
st_adata, mdata,
metabolite="mz_674.2805",
cell_type_col="Region",
st_coord_key="spatial_rir",
sm_coord_key="SM_spatial_rir",
cell_dot_size=250,
gene_dot_size=10,
x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max,
figsize=(4,4),
st_cmap=None,
m_cmap='plasma',
out_fn='ccs_zoom_region.pdf'
)
%autoreload
import seaborn as sns
import pandas as pd
dopamine = 'Dopamine (mz_674.2805)'
mymap = {'unk': np.nan, 'unk2': np.nan}
sm_adata.obs['Region'] = sm_adata.obs['ST_RegionLoupe'].apply(lambda x: mymap.get(x, x))
mymap = st_adata.obs['Cell_Type'].to_dict()
sm_adata.obs['Cell_Type'] = sm_adata.obs['ST_ST_id'].apply(lambda x: mymap[x])
sm_adata.obs['New_Cell_Type'] = sm_adata.obs['Cell_Type'].apply(lambda x: np.nan if x in ['Inhib', 'Inhib_MSN'] else x)
custom_order = ['MSN', 'Mi.MiR.Ma', 'Astro', 'OPC', 'Oligo']
sm_adata.obs['New_Cell_Type'] = sm_adata.obs['New_Cell_Type'].astype(pd.CategoricalDtype(categories=custom_order, ordered=True))
sm_adata.obs[dopamine] = dict(zip(sm_adata.obs.index, sm_adata[:, 'mz_674.2805'].X.reshape(-1)))
compare_type = 'New_Cell_Type'
comparisons = [
("MSN", "Astro"),
("MSN", "Mi.MiR.Ma"),
("MSN", "OPC"),
("MSN", "Oligo"),
]
base_colors = sns.color_palette('Accent', 8).as_hex()
palette = dict(zip(np.sort(sm_adata.obs['Cell_Type'].dropna().unique()), base_colors[:3] + base_colors[-4:]))
pl.violin(sm_adata, compare_type, dopamine, comparisons,
fig_size=(3, 3),
palette=palette,
text_x_angle=20,
out_fn='violin_cell_type.pdf')
%autoreload
compare_type = 'Region'
comparisons = [
#("ACB", "CI"),
#("ACB", "Cd"),
("Cd", "CI"),
]
pl.violin(sm_adata[sm_adata.obs['Region'] != 'ACB'], compare_type, dopamine, comparisons,
fig_size=(3, 3),
text_x_angle=90,
palette=sc.pl.palettes.default_20[1:],
out_fn='violin_region.pdf')
import pandas as pd
df = pd.read_csv('../../../data/MSA_data/corHstr_onlyCdSpots.csv', index_col=0)
SMA_cor_df = df[(df['FDR'] < 0.05) & (df['rho'].abs() > 0.2)]
SMA_cor_df['Region'] = 'Cd'
SMA_cor_df['Method'] = 'SMA'
SMA_cor_df
from scipy.stats import pearsonr
data_list = []
for region in sm_adata.obs['Region'].unique():
df = sm_adata[sm_adata.obs['Region'] == region].obs
if df.shape[0] == 0:
continue
for gene in SMA_cor_df['gene'].tolist():
if gene not in df.columns:
continue
corr, p_value = pearsonr(df[dopamine], df[gene])
data_list.append((region, 'Dopamine', gene, corr, p_value))
df = pd.DataFrame(data_list, columns=['Region', 'dopamine', 'gene', 'rho', 'Pvalue'])
df['Method'] = 'SCRoFit'
bench_cor_df = pd.concat([SMA_cor_df[df.columns], df])
bench_cor_df['Type'] = bench_cor_df['Region'] + '(' + bench_cor_df['Method'] + ')'
bench_cor_df['InvLog10Pvalue'] = -np.log10(bench_cor_df['Pvalue'])
bench_cor_df = bench_cor_df[bench_cor_df['Pvalue'] < 0.05]
from matplotlib.colors import Normalize
# Normalize the color values to set the midpoint to zero
z = bench_cor_df['rho']
norm = Normalize(vmin=-np.max(np.abs(z)), vmax=np.max(np.abs(z)))
fig = plt.figure(figsize=(7, 1.8))
sns.scatterplot(
data=bench_cor_df.sort_values(by='rho'),
y="Type", x="gene",
#order=['Cd(SCRoFit)', 'Cd(SMA)', 'CI(SCRoFit)', 'ACB', 'SCRoFit'],
hue='rho', hue_norm=norm,
size='InvLog10Pvalue',
palette='coolwarm'
)
plt.xlabel(None)
plt.xticks(rotation=45, ha='right')
plt.ylabel(None)
plt.legend().set_visible(False) # Hide the legend
plt.tight_layout()
plt.show()
from scipy.stats import pearsonr
data_list = []
for cell_type in sm_adata.obs['Cell_Type'].unique():
df = sm_adata[sm_adata.obs['Cell_Type'] == cell_type].obs
if df.shape[0] == 0:
continue
for gene in SMA_cor_df['gene'].tolist():
if gene not in df.columns:
continue
corr, p_value = pearsonr(df[dopamine], df[gene])
data_list.append((cell_type, 'Dopamine', gene, corr, p_value))
cor_df = pd.DataFrame(data_list, columns=['Cell Type', 'dopamine', 'gene', 'rho', 'Pvalue'])
cor_df['Method'] = 'SCRoFit'
cor_df = cor_df[cor_df['Pvalue'] < 0.05]
cor_df['InvLog10Pvalue'] = -np.log10(cor_df['Pvalue'])
fig = plt.figure(figsize=(7,3))
z = cor_df['rho']
norm = Normalize(vmin=-np.max(np.abs(z)), vmax=np.max(np.abs(z)))
sns.scatterplot(
data=cor_df.sort_values(by='rho'),
y="Cell Type", x="gene",
#order=['Cd(SCRoFit)', 'Cd(SMA)', 'CI(SCRoFit)', 'ACB', 'SCRoFit'],
hue='rho', hue_norm=norm,
size='InvLog10Pvalue',
palette='coolwarm'
)
plt.xlabel(None)
plt.ylabel(None)
plt.xticks(rotation=45, ha='right')
plt.legend().set_visible(False) # Hide the legend
plt.tight_layout()
plt.show()
bench_cor_df['Annotation'] = bench_cor_df['Region']
bench_cor_df['Function Annotation'] = 'Region'
cor_df['Annotation'] = cor_df['Cell Type']
cor_df['Function Annotation'] = 'Cell Type'
cols = ['Function Annotation', 'Annotation', 'dopamine', 'gene', 'rho', 'Pvalue', 'Method',
'InvLog10Pvalue']
df = pd.concat([bench_cor_df[cols], cor_df[cols]])
df = df.sort_values(by=['Function Annotation', 'Annotation', 'rho'], ascending=[False, True, True])
z = df['rho']
norm = Normalize(vmin=-np.max(np.abs(z)), vmax=np.max(np.abs(z)))
fig = plt.figure(figsize=(4,5))
sns.scatterplot(
data=df[df['Method'] == 'SCRoFit'],
x="Annotation", y="gene",
#order=['Cd(SCRoFit)', 'Cd(SMA)', 'CI(SCRoFit)', 'ACB', 'SCRoFit'],
hue='rho', hue_norm=norm,
size='InvLog10Pvalue',
palette='coolwarm'
)
plt.xlabel(None)
plt.ylabel(None)
plt.xticks(rotation=60, ha='right')
#plt.legend().set_visible(False) # Hide the legend
plt.legend(loc="upper left", bbox_to_anchor=(1, 1))
#plt.tight_layout()
plt.show()
import pandas as pd
import numpy as np
from plotnine import *
df = df.sort_values(
by=['Function Annotation', 'Annotation', 'rho'],
ascending=[False, True, True]
).copy()
df['gene'] = pd.Categorical(df['gene'], categories=df['gene'].unique(), ordered=True)
df = df[df['Annotation'].isin(['CI', 'Cd', 'MSN', 'Astro', 'OPC', 'Oligo'])]
df['Annotation'] = pd.Categorical(df['Annotation'],categories=['CI', 'Cd', 'MSN', 'Astro', 'OPC', 'Oligo'],ordered=True)
max_abs = df['rho'].abs().max()
p = (
ggplot(df[df['Method'] == 'SCRoFit'],
aes(y='Annotation', x='gene',
color='rho',
size='InvLog10Pvalue'))
+ facet_grid('Function Annotation ~ .',
scales='free', space='free')
+ geom_point()
+ scale_color_gradient2(
low="blue", mid="white", high="red",
midpoint=0,
limits=(-max_abs, max_abs)
)
+ scale_size_area(max_size=5)
+ theme_bw()
+ theme(
figure_size=(5, 4),
axis_text_y=element_text(angle=45, hjust=1),
axis_text_x=element_text(angle=90, hjust=-1),
strip_background=element_blank(),
)
+ labs(
color='rho',
size='-log10(p)',
x='',
y=''
)
)
p.save('spatial_cor.pdf')
p
st_adata[st_adata.obs['Region'] == 'ACB'].obsm['spatial_rir'].copy().min(axis=0), \
st_adata[st_adata.obs['Region'] == 'ACB'].obsm['spatial_rir'].copy().max(axis=0)
%autoreload
x_min = 17000
x_max = x_min + 5000
y_min = 120000
y_max = y_min + 5000
# Usage example
pl.ccs_spatial_zoom(
st_adata, mdata,
metabolite="mz_674.2805",
gene="GFAP",
cell_type_col="Region",
st_coord_key="spatial_rir",
sm_coord_key="SM_spatial_rir",
cell_dot_size=250,
gene_dot_size=10,
x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max,
figsize=(4, 4),
st_cmap=None,
g_cmap='Spectral_r',
m_cmap='plasma',
out_fn='ccs_zoom_GFAP_region.pdf'
)
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'GFAP', 'Region', 'ACB')
print(st_adata[st_adata.obs['Region'] == 'CI'].obsm['spatial_rir'].copy().min(axis=0), \
st_adata[st_adata.obs['Region'] == 'CI'].obsm['spatial_rir'].copy().max(axis=0))
x_min = 30120
x_max = x_min + 5000
y_min = 80000
y_max = y_min + 5000
pl.ccs_spatial_zoom(
st_adata, mdata,
metabolite="mz_674.2805",
gene="TF",
cell_type_col="Region",
st_coord_key="spatial_rir",
sm_coord_key="SM_spatial_rir",
cell_dot_size=250,
gene_dot_size=10,
x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max,
figsize=(4, 4),
st_cmap=None,
g_cmap='Spectral_r',
m_cmap='plasma',
out_fn='ccs_zoom_CI_TF_mz_674.2805.pdf'
)
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'TF', 'Region', 'CI')
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'NCDN', 'Region', 'Cd')
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'SCG2', 'Region', 'Cd')
%autoreload
print(st_adata[st_adata.obs['Cell_Type'] == 'Astro'].obsm['spatial_rir'].copy().min(axis=0), \
st_adata[st_adata.obs['Cell_Type'] == 'Astro'].obsm['spatial_rir'].copy().max(axis=0))
x_min = 15000
x_max = x_min + 5000
y_min = 150000
y_max = y_min + 5000
# Usage example
pl.ccs_spatial_zoom(
st_adata, mdata,
metabolite="mz_674.2805",
gene="MBP",
cell_type_col="Cell_Type",
st_coord_key="spatial_rir",
sm_coord_key="SM_spatial_rir",
cell_dot_size=250,
gene_dot_size=10,
x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max,
figsize=(4, 4),
st_cmap='Accent',
g_cmap='Spectral_r',
m_cmap='plasma',
out_fn='ccs_zoom_Astro_MBP_mz_674.2805.pdf'
)
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'MBP', 'Cell_Type', 'Astro')
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'SCG2', 'Cell_Type', 'MSN')
%autoreload
print(st_adata[st_adata.obs['Cell_Type'] == 'MSN'].obsm['spatial_rir'].copy().min(axis=0), \
st_adata[st_adata.obs['Cell_Type'] == 'MSN'].obsm['spatial_rir'].copy().max(axis=0))
x_min = 25120
x_max = x_min + 5000
y_min = 100000
y_max = y_min + 5000
# Usage example
pl.ccs_spatial_zoom(
st_adata, mdata,
metabolite="mz_674.2805",
gene="NCDN",
cell_type_col="Cell_Type",
st_coord_key="spatial_rir",
sm_coord_key="SM_spatial_rir",
cell_dot_size=250,
gene_dot_size=10,
x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max,
figsize=(4, 4),
st_cmap='Accent',
g_cmap='Spectral_r',
m_cmap='plasma',
out_fn='ccs_zoom_MSN_NCDN_mz_674.2805.pdf'
)
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'NCDN', 'Cell_Type', 'MSN')
%autoreload
pl.plot_gene_cell_type(st_adata, mdata, 'PLP1', 'Cell_Type', 'Oligo')
bench_cor_df[bench_cor_df['gene'].isin(['GFAP', 'TF', 'NCDN', 'SCG2', 'MBP', 'PLP1'])]