Tutorials_scCAMEL-SWAPLINEv1_LiverMacrophage

Original Article: Human resident liver myeloid cells protect against metabolic stress in obesity,”Nature Metabolism.”, 2023

Package: scCAMEL-SWAPLINE.v1

[11]:

import datetime
today=f"{datetime.datetime.now():%Y-%m-%d}"
today

[11]:

'2023-07-07'

[2]:

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import torch.utils.data as data_utils
from matplotlib import cm
import numpy as np
import pandas as pd
import pickle as pickle
from scipy.spatial.distance import cdist, pdist, squareform
import pandas as pd
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import StratifiedShuffleSplit
from collections import defaultdict
from sklearn import preprocessing
import matplotlib.patches as mpatches
import torch.nn.functional as F
import math
#import gpytorch

import urllib.request
import os.path
from scipy.io import loadmat
from math import floor
import anndata
# Make plots inline
%pylab inline

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib

C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\IPython\core\magics\pylab.py:162: UserWarning: pylab import has clobbered these variables: ['floor']
`%matplotlib` prevents importing * from pylab and numpy
  warn("pylab import has clobbered these variables: %s"  % clobbered +

[3]:

torch.manual_seed(1)    # reproducible

[3]:

<torch._C.Generator at 0x24082a98eb0>

[4]:

import scCAMEL as scm
from scCAMEL import CamelPrefiltering
from scCAMEL import CamelSwapline
from scCAMEL import CamelEvo

C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelSwapline.py:534: FutureWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.
  def addcolor(datax,clustername="Cluster", colorcode="color", predef=pd.Series()):

[26]:

screfall=anndata.read("LiverMacrophage_474cells_Ref2023-01-16_MergeCluster_35epch.h5ad")
screfall

[26]:

AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster'

[28]:

set(screfall.obs["Cluster"])

[28]:

{'LM1', 'LM2-C1', 'LM2-C2', 'LM3', 'LM4'}

[29]:

scref=screfall

[30]:

set(scref.obs["Cluster"])

[30]:

{'LM1', 'LM2-C1', 'LM2-C2', 'LM3', 'LM4'}

[31]:

scref.obs.groupby(["Cluster"]).count()

[31]:

	cellID
Cluster
LM1	133
LM2-C1	39
LM2-C2	73
LM3	96
LM4	133

[35]:

scref.X=scref.X.todense()

[36]:

path='/Dropbox/data/proj/PE_HYZ/PublicDataSet/'
filename='PANTHER_cell_cycle_genes.txt'
#dfpfc2= prefilteringTest.prefilter(df_f=dfpfc,filename=filename, path=path)
#scref= scm.CamelPrefiltering.prefilter(datax=scref,filename=filename, path=path)

[37]:

scref=scm.CamelPrefiltering.DataScaling(scref)

[38]:

scref.var['Filter1']=[True]*scref.var.shape[0]
scref

[38]:

AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster'
    var: 'Filter1'

[43]:

scref=CamelPrefiltering.SelectFeatures(datax=scref, clustername='Cluster',methodname='wilcoxon', numbergenes=1000, folderchange=1.5)

C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: overflow encountered in expm1
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:418: RuntimeWarning: overflow encountered in expm1
  self.expm1_func(mean_rest) + 1e-9
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:417: RuntimeWarning: invalid value encountered in true_divide
  foldchanges = (self.expm1_func(mean_group) + 1e-9) / (
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scanpy\tools\_rank_genes_groups.py:420: RuntimeWarning: divide by zero encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(

[45]:

########################################################
########################################################
#remeber to change the file path in tftable
########################################################
########################################################
scref =scm.CamelPrefiltering.LabelGene_Scaling(datax=scref,
                                                                TPTT=100000,     mprotogruop=scref.obs["Cluster"].values,commongene=None,
                                                                                              sharedMVgenes=None,std_scaling=True,
    tftable="(file path to tftable)/FantomTF2CLUSTER_human_official.txt", learninggroup="train")

CamelRunning---GenesScaling......

C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:576: FutureWarning: In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.
  scalepfc = dfpfc.div(dfpfc.std(1), axis=0).dropna(0)
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:577: FutureWarning: In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.
  scalepfc = dfpfc.astype(float).dropna(0)
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:579: FutureWarning: In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.
  scalepfc = scalepfc.dropna(0)
C:\Users\huyiz\AppData\Local\Continuum\anaconda3\envs\py39\lib\site-packages\scCAMEL\CamelPrefiltering.py:580: FutureWarning: Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead.
  dfpfc_dev = scalepfc.loc[set(scalepfc.index) & set(sharedMVgenes)].dropna()

CamelRunning---TrainingGenesScaling......Finished

[46]:

scref

[46]:

AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names'
    obsm: 'train_set_values'

[47]:

len(scref.var.index[scref.var["MVgene"]])

[47]:

[48]:

net=scm.CamelPrefiltering.NNclassifer(
   datax=scref,
    epochNum=200,
    learningRate=0.03,
    verbose=0,
    optimizerMmentum=0.8,
    dropout=0.3,
    #imizer__nesterov=True,
    )

CamelRunning---NNclasffier_in_cuda.......
CamelRunning---NNclasffier_in_cuda.......Finished

#Accuracy plot, the overall clustering accuracy is ~85%

[52]:

ax=scm.CamelPrefiltering.AccuracyPlot( nnModel=net, accCutoff=0.85,
                 Xlow=-1, Ylow=0.0, Yhigh=1,
               )
plt.savefig("upload_%s_CurvePlot_learningAccuracy.pdf"%today,bbox_inches='tight')

../_images/Tutorials_scCAMEL_SWAPLINEv2_Tutorials_scCAMEL-SWAPLINEv1_LiverMacrophage_22_0.png

[53]:

net=scm.CamelPrefiltering.NNclassifer(
   datax=scref,
    epochNum=35,
    learningRate=0.03,
    verbose=0,
    optimizerMmentum=0.8,
    dropout=0.3,
    #imizer__nesterov=True,
    )

CamelRunning---NNclasffier_in_cuda.......
CamelRunning---NNclasffier_in_cuda.......Finished

[54]:

scref

[54]:

AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names'
    obsm: 'train_set_values'

[56]:

#if color is not defined
scref=scm.CamelSwapline.addcolor(datax=scref,clustername="Cluster", colorcode="color")

[57]:

scref

[57]:

AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index', 'color'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names', 'refcolor_dict'
    obsm: 'train_set_values'

[58]:

set(scref.obs["Cluster"])

[58]:

{'LM1', 'LM2-C1', 'LM2-C2', 'LM3', 'LM4'}

[ ]:

LM1: #ABD9E9
LM2-C1 (use name LM2): #2C7BB6
LM2-C2 (use name cDC2): purple
LM3: #FDAE61
LM4: #D7191C

[59]:

clist=[]
for item in scref.obs["Cluster"]:
    if item=="LM1":
        clist.append("#ABD9E9")
    elif item=="LM2-C2":
        clist.append("#2C7BB6")
    elif item=="LM2-C1":
        clist.append("purple")
    elif item=="LM3":
        clist.append("#FDAE61")
    elif item=="LM4":
        clist.append("#D7191C")

[60]:

scref.obs["color"]=clist

[61]:

scref.uns['refcolor_dict']={'LM2-C1': [44, 123, 182],
 'LM2-C2': [143, 0, 255],
 "LM3": [253,174,97],
                           "LM4": [215,25,28],
 'LM1': [171, 217, 233],
                           }

[62]:

scref.uns["mwanted_order"] =list(sort(list(set(scref.obs["Cluster"]))))

[64]:

scref=scm.CamelSwapline.prediction(datax=scref, mcolor_dict=scref.uns["refcolor_dict"] ,net=net,learninggroup="train", radarplot=True,fontsizeValue=18,
                       ncolnm=3, bbValue=(1.2, 1.05)  )
plt.savefig("upload_%s_RadarPlot_Merged_cluster.pdf"%today,bbox_inches='tight')

../_images/Tutorials_scCAMEL_SWAPLINEv2_Tutorials_scCAMEL-SWAPLINEv1_LiverMacrophage_33_0.png

[65]:

scref

[65]:

AnnData object with n_obs × n_vars = 474 × 21397
    obs: 'cellID', 'Cluster', 'mtrain_index', 'color'
    var: 'Filter1', 'MVgene', 'RefGeneList'
    uns: 'rank_genes_groups', 'train_set_gene', 'mclasses_names', 'refcolor_dict', 'mwanted_order', 'Celltype_Score_RefCellType', 'Celltype_OrderNumber'
    obsm: 'train_set_values', 'Celltype_Score', 'CelltypeScoreCoordinates'

[70]:

#work_dir=""
#filename="%s_Ref%s_MergeCluster_35epch.h5ad"%("LiverMacrophageNNlearned",today)

[71]:

os.path.join(work_dir,filename)

[71]:

'/Dropbox/data/proj/PE_HYZ/PublicDataSet/Liver_Macrophage/LiverMacrophageNNlearned_Ref2022-12-21_MergeCluster_35epch.h5ad'

[72]:

scm.CamelSwapline.writedata(adatax=scref,filename=filename,filepath=work_dir)

[196]:

scref.obs.to_csv("Metatable.csv",sep=",")