Tutorials_scCAMEL-SWAPLINEv1_LiverMacrophage

Original Article: Human resident liver myeloid cells protect against metabolic stress in obesity,”Nature Metabolism.”, 2023

Package: scCAMEL-SWAPLINE.v1

[11]:
import datetime
today=f"{datetime.datetime.now():%Y-%m-%d}"
today
[11]:
'2023-07-07'
[2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import torch.utils.data as data_utils
from matplotlib import cm
import numpy as np
import pandas as pd
import pickle as pickle
from scipy.spatial.distance import cdist, pdist, squareform
import pandas as pd
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import StratifiedShuffleSplit
from collections import defaultdict
from sklearn import preprocessing
import matplotlib.patches as mpatches
import torch.nn.functional as F
import math
#import gpytorch

import urllib.request
import os.path
from scipy.io import loadmat
from math import floor
import anndata
# Make plots inline
%pylab inline

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\IPython\core\magics\pylab.py:162: UserWarning: pylab import has clobbered these variables: ['floor']
`%matplotlib` prevents importing * from pylab and numpy
  warn("pylab import has clobbered these variables: %s"  % clobbered +
[3]:
torch.manual_seed(1)    # reproducible
[3]:
<torch._C.Generator at 0x24082a98eb0>
[4]:
import scCAMEL as scm
from scCAMEL import CamelPrefiltering
from scCAMEL import CamelSwapline
from scCAMEL import CamelEvo
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelSwapline.py:534: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  def addcolor(datax,clustername="Cluster", colorcode="color", predef=pd.Series()):
[26]:
screfall=anndata.read("LiverMacrophage_474cells_Ref2023-01-16_MergeCluster_35epch.h5ad")
screfall
[26]:
AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster'
[28]:
set(screfall.obs["Cluster"])
[28]:
{'LM1', 'LM2-C1', 'LM2-C2', 'LM3', 'LM4'}
[29]:
scref=screfall
[30]:
set(scref.obs["Cluster"])
[30]:
{'LM1', 'LM2-C1', 'LM2-C2', 'LM3', 'LM4'}
[31]:
scref.obs.groupby(["Cluster"]).count()
[31]:
cellID
Cluster
LM1 133
LM2-C1 39
LM2-C2 73
LM3 96
LM4 133
[35]:
scref.X=scref.X.todense()
[36]:
path='/Dropbox/data/proj/PE_HYZ/PublicDataSet/'
filename='PANTHER_cell_cycle_genes.txt'
#dfpfc2= prefilteringTest.prefilter(df_f=dfpfc,filename=filename, path=path)
#scref= scm.CamelPrefiltering.prefilter(datax=scref,filename=filename, path=path)
[37]:
scref=scm.CamelPrefiltering.DataScaling(scref)
[38]:
scref.var['Filter1']=[True]*scref.var.shape[0]
scref
[38]:
AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster'
    var: 'Filter1'
[43]:
scref=CamelPrefiltering.SelectFeatures(datax=scref, clustername='Cluster',methodname='wilcoxon', numbergenes=1000, folderchange=1.5)
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
[45]:
########################################################
########################################################
#remeber to change the file path in tftable
########################################################
########################################################
scref =scm.CamelPrefiltering.LabelGene_Scaling(datax=scref,
                                                                TPTT=100000,     mprotogruop=scref.obs["Cluster"].values,commongene=None,
                                                                                              sharedMVgenes=None,std_scaling=True,
    tftable="(file path to tftable)/FantomTF2CLUSTER_human_official.txt", learninggroup="train")


CamelRunning---GenesScaling......
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:576: FutureWarning: In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.
  scalepfc = dfpfc.div(dfpfc.std(1), axis=0).dropna(0)
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:577: FutureWarning: In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.
  scalepfc = dfpfc.astype(float).dropna(0)
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:579: FutureWarning: In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.
  scalepfc = scalepfc.dropna(0)
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:580: FutureWarning: Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
  dfpfc_dev = scalepfc.loc[set(scalepfc.index) & set(sharedMVgenes)].dropna()
CamelRunning---TrainingGenesScaling......Finished
[46]:
scref
[46]:
AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names'
    obsm: 'train_set_values'
[47]:
len(scref.var.index[scref.var["MVgene"]])
[47]:
3987
[48]:
net=scm.CamelPrefiltering.NNclassifer(
   datax=scref,
    epochNum=200,
    learningRate=0.03,
    verbose=0,
    optimizerMmentum=0.8,
    dropout=0.3,
    #imizer__nesterov=True,
    )
CamelRunning---NNclasffier_in_cuda.......
CamelRunning---NNclasffier_in_cuda.......Finished

#Accuracy plot, the overall clustering accuracy is ~85%

[52]:
ax=scm.CamelPrefiltering.AccuracyPlot( nnModel=net, accCutoff=0.85,
                 Xlow=-1, Ylow=0.0, Yhigh=1,
               )
plt.savefig("upload_%s_CurvePlot_learningAccuracy.pdf"%today,bbox_inches='tight')
../_images/Tutorials_scCAMEL_SWAPLINEv2_Tutorials_scCAMEL-SWAPLINEv1_LiverMacrophage_22_0.png
[53]:
net=scm.CamelPrefiltering.NNclassifer(
   datax=scref,
    epochNum=35,
    learningRate=0.03,
    verbose=0,
    optimizerMmentum=0.8,
    dropout=0.3,
    #imizer__nesterov=True,
    )
CamelRunning---NNclasffier_in_cuda.......
CamelRunning---NNclasffier_in_cuda.......Finished
[54]:
scref
[54]:
AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names'
    obsm: 'train_set_values'
[56]:
#if color is not defined
scref=scm.CamelSwapline.addcolor(datax=scref,clustername="Cluster", colorcode="color")
[57]:
scref
[57]:
AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index', 'color'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names', 'refcolor_dict'
    obsm: 'train_set_values'
[58]:
set(scref.obs["Cluster"])
[58]:
{'LM1', 'LM2-C1', 'LM2-C2', 'LM3', 'LM4'}
[ ]:
LM1: #ABD9E9
LM2-C1 (use name LM2): #2C7BB6
LM2-C2 (use name cDC2): purple
LM3: #FDAE61
LM4: #D7191C

[59]:
clist=[]
for item in scref.obs["Cluster"]:
    if item=="LM1":
        clist.append("#ABD9E9")
    elif item=="LM2-C2":
        clist.append("#2C7BB6")
    elif item=="LM2-C1":
        clist.append("purple")
    elif item=="LM3":
        clist.append("#FDAE61")
    elif item=="LM4":
        clist.append("#D7191C")
[60]:
scref.obs["color"]=clist
[61]:
scref.uns['refcolor_dict']={'LM2-C1': [44, 123, 182],
 'LM2-C2': [143, 0, 255],
 "LM3": [253,174,97],
                           "LM4": [215,25,28],
 'LM1': [171, 217, 233],
                           }
[62]:
scref.uns["mwanted_order"] =list(sort(list(set(scref.obs["Cluster"]))))
[64]:
scref=scm.CamelSwapline.prediction(datax=scref, mcolor_dict=scref.uns["refcolor_dict"] ,net=net,learninggroup="train", radarplot=True,fontsizeValue=18,
                       ncolnm=3, bbValue=(1.2, 1.05)  )
plt.savefig("upload_%s_RadarPlot_Merged_cluster.pdf"%today,bbox_inches='tight')
../_images/Tutorials_scCAMEL_SWAPLINEv2_Tutorials_scCAMEL-SWAPLINEv1_LiverMacrophage_33_0.png
[65]:
scref
[65]:
AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index', 'color'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names', 'refcolor_dict', 'mwanted_order', 'Celltype_Score_RefCellType', 'Celltype_OrderNumber'
    obsm: 'train_set_values', 'Celltype_Score', 'CelltypeScoreCoordinates'
[70]:
#work_dir=""
#filename="%s_Ref%s_MergeCluster_35epch.h5ad"%("LiverMacrophageNNlearned",today)
[71]:
os.path.join(work_dir,filename)
[71]:
'/Dropbox/data/proj/PE_HYZ/PublicDataSet/Liver_Macrophage/LiverMacrophageNNlearned_Ref2022-12-21_MergeCluster_35epch.h5ad'
[72]:
scm.CamelSwapline.writedata(adatax=scref,filename=filename,filepath=work_dir)
[196]:
scref.obs.to_csv("Metatable.csv",sep=",")