Run SMA data with SCrOFit
This notebook demosntrate how to use SCrOFit to run SMA data.
%autoreload
import anndata as ad
import os
import pandas as pd
import sys
sys.path.append('../../')
from scrofit.scrofit import SCrOFit
df = pd.read_csv('../../../data/MSA_data/corHstr_onlyCdSpots.csv', index_col=0)
SMA_cor_df = df[(df['FDR'] < 0.05) & (df['rho'].abs() > 0.2)]
def run_align(in_dir, sample):
st_adata = ad.read_h5ad(f'{in_dir}/{sample}_st_raw.adata')
sm_adata = ad.read_h5ad(f'{in_dir}/{sample}_sm_raw.adata')
adata_dict = {'ST': st_adata, 'SM': sm_adata}
out_dir = os.path.join(in_dir, 'scrofit_out')
obj = SCrOFit(adata_dict=adata_dict, out_dir=out_dir, sample=sample)
obj.align_slides()
print('---start mapping---')
obj.mapping(mapping_method='MCMF', ccs_type='spatial_rir',
alpha=1, beta=1, n_neighbors=3,
n_thread=6, n_batch=250,
verbose=False)
obj.check_slides()
headers = ['RegionLoupe', 'MSN.D2.PenkPos_2', 'MSN.D1.TacNeg_3', 'MSN.D2.PenkNeg_5',
'MSN.D1.TacPosB_6', 'MSN.D1.TacPosA_9', 'MSN.D2_C_18'] + SMA_cor_df['gene'].tolist()
obj.transfer_anno(headers)
for key, adata in adata_dict.items():
fn = '{}/{}_{}_pp.adata'.format(in_dir, sample, key.lower())
adata.write_h5ad(fn)
return obj, adata_dict
obj_dict = {}
for sample in ['V11T17-102']:
for block in ['A1', 'B1', 'C1', 'D1']:
print(block)
in_dir = f'../../../data/MSA_data/{sample}'
obj, adata_dict = run_align(in_dir, sample + '_' + block)
obj_dict[block] = obj
%autoreload
import anndata as ad
import os
import numpy as np
from scrofit.mapping import init_mdata
def merge_celltype(st_adata):
cell_types = ["Astro_0","Oligo_1","MSN.D2.PenkPos_2","MSN.D1.TacNeg_3","Oligo_4","MSN.D2.PenkNeg_5",
"MSN.D1.TacPosB_6","OPC_7","Mi.MiR.Ma_8","MSN.D1.TacPosA_9","Astro_10","Astro_11","Inhib_MSN_12",
"Inhib_13","unk_14","unk_15","unk_16","Astro_17","MSN.D2_C_18","unk_19","OPC_20","Oligo_21","unk_22"]
X = st_adata.obs[cell_types].to_numpy()
st_adata.obs['Cell Type'] = np.array(cell_types)[X.argmax(axis=1)]
st_adata.obs['Cell Type'] = st_adata.obs['Cell Type'].apply(lambda x: '_'.join(x.split('_')[:-1]))
st_adata.obs['Cell Type'] = st_adata.obs['Cell Type'].apply(lambda x: np.nan if x == 'unk' else x)
mymap = {
'MSN.D2.PenkNeg': 'MSN',
'MSN.D1.TacPosB': 'MSN',
'MSN.D1.TacPosA': 'MSN',
'MSN.D2.PenkPos': 'MSN',
'MSN.D1.TacNeg': 'MSN',
'MSN.D2_C': 'MSN',
}
st_adata.obs['Cell_Type'] =st_adata.obs['Cell Type'].apply(lambda x: mymap.get(x, x))
def replace_y_coord(in_dir, sample, block, i):
st_adata = ad.read_h5ad(f'{in_dir}/{sample}_{block}_st_pp.adata')
sm_adata = ad.read_h5ad(f'{in_dir}/{sample}_{block}_sm_pp.adata')
for key in ['spatial_rir', 'spatial_rir_filtered', 'spatial_MCMF_map', 'spatial']:
if key in st_adata.obsm:
st_adata.obsm[key][:, 1] += 45000*i
print(sample, key, 'ST', st_adata.obsm[key][:, 1].min(), st_adata.obsm[key][:, 1].max())
if key in sm_adata.obsm:
if key == 'spatial':
sm_adata.obsm[key][:, 1] += 70*i
else:
sm_adata.obsm[key][:, 1] += 45000*i
print(sample, key, 'ST', sm_adata.obsm[key][:, 1].min(), sm_adata.obsm[key][:, 1].max())
return st_adata, sm_adata
st_adata_list, sm_adata_list = [], []
mdata_list = []
for sample in ['V11T17-102']:
image_list = []
for i, block in enumerate(['A1', 'B1', 'C1', 'D1']):
in_dir = f'../../../data/MSA_data/{sample}'
st_adata, sm_adata = replace_y_coord(in_dir, sample, block, i)
merge_celltype(st_adata)
st_adata_list.append(st_adata)
sm_adata_list.append(sm_adata)
image = st_adata.uns['spatial'][f'{sample}_{block}']['images']['hires']
scalefactors = st_adata.uns['spatial'][f'{sample}_{block}']['scalefactors']
image_list.append(image)
#for col in sm_adata.obs.columns:
# if col.startswith('ST'):
# del sm_adata.obs[col]
mdata = init_mdata(sm_adata, st_adata)
mdata_list.append(mdata)
st_adata = ad.concat(st_adata_list)
print(st_adata_list[0].uns['spatial'][f'{sample}_A1']['scalefactors'])
print(st_adata_list[1].uns['spatial'][f'{sample}_B1']['scalefactors'])
print(st_adata_list[2].uns['spatial'][f'{sample}_C1']['scalefactors'])
print(st_adata_list[3].uns['spatial'][f'{sample}_D1']['scalefactors'])
st_adata.uns['spatial'] = {sample:{
'images': {'hires': np.concatenate(image_list)},
'scalefactors': scalefactors
}}
sm_adata = ad.concat(sm_adata_list)
sm_adata.uns['spatial'] = {sample:{'images': {'hires': np.concatenate(image_list)}}}
mdata = ad.concat(mdata_list)
st_adata.write_h5ad(f'{in_dir}/{sample}_st_pp.adata')
sm_adata.write_h5ad(f'{in_dir}/{sample}_sm_pp.adata')
mdata
%autoreload
from scrofit.embedding import embedding
embedding(mdata)
mdata.write_h5ad(f'{in_dir}/{sample}_m_pp.adata')