Bowling et al., 2020 Datasets (CARLIN)

An Engineered CRISPR-Cas9 Mouse Line for Simultaneous Readout of Lineage Histories and Gene Expression Profiles in Single Cells

[1]:

import scanpy as sc
import numpy as np
import operator
import pandas as pd
import torch.nn.functional as F
import torch.nn as nn
import scanpy as sc
import matplotlib.pyplot as plt
import torch
import os
#setPltLinewidth(0)
# os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,6,7"

from torch.nn import DataParallel
import os
import anndata as ad
import cospar as cs
import scanpy as sc
def createFig(figsize=(8, 4)):
    fig,ax=plt.subplots()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    #ax.spines['bottom'].set_color('none')
    #ax.spines['left'].set_color('none')
    for line in ax.yaxis.get_ticklines():
        line.set_markersize(5)
        line.set_color("#585958")
        line.set_markeredgewidth(0.5)
    for line in ax.xaxis.get_ticklines():
        line.set_markersize(5)
        line.set_markeredgewidth(0.5)
        line.set_color("#585958")
    ax.set_xbound(0,10)
    ax.set_ybound(0,10)
    fig.set_size_inches(figsize)
    return fig,ax
def setPltLinewidth(linewidth:float):
    mpl.rcParams['axes.linewidth'] = linewidth
import matplotlib as mpl
fig,ax=createFig()
fig.set_size_inches(5,5)
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
setPltLinewidth(1)
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 14
plt.rcParams['axes.linewidth'] = 1

[24]:

from math import sqrt

import torch
import torch.nn as nn
num_relations=3
len_geneExp=1646
len_embedding=256
learning_rate=0.000123
num_epoch=10
batchsize=512
class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.LeakyReLU(inplace=False),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _ = x.size()
        #print('channel:',c)
        #print('size:',x.shape)
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        #print('y_size:',y.shape)
        return x * y.expand_as(x)
class MutiheadAttention(nn.Module):
    def __init__(self, input_dim, dim_k, dim_v,num_heads):
        super(MutiheadAttention, self).__init__()
        self.dim_q = dim_k
        self.dim_k = dim_k
        self.dim_v = dim_v
        self.num_units=dim_k
        self.num_heads=num_heads
        self.linear_q = nn.Linear(input_dim, dim_k, bias=False)
        self.linear_k = nn.Linear(input_dim, dim_k, bias=False)
        self.linear_v = nn.Linear(input_dim, dim_v, bias=False)
        self._norm_fact = 1 / sqrt(dim_k)

    def forward(self, x):
        # x: batch_size, seq_len, input_dim
        q = self.linear_q(x)  # batch_size, seq_len, dim_k
        k = self.linear_k(x)  # batch_size, seq_len, dim_k
        v = self.linear_v(x)  # batch_size, seq_len, dim_v
        split_size = self.num_units // self.num_heads
        q = torch.stack(torch.split(q, split_size, dim=2), dim=0)  # [h, N, T_q, num_units/h]
        k = torch.stack(torch.split(k, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]
        v = torch.stack(torch.split(v, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]

        scores = torch.matmul(q, k.transpose(2, 3))
        scores = scores / (self.dim_k ** 0.5)

        scores = F.softmax(scores, dim=3)
        ## out = score * V
        out = torch.matmul(scores, v)  # [h, N, T_q, num_units/h]
        out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0)

        return out

class ResidualBlock(torch.nn.Module):
    def __init__(self,channels):
        super(ResidualBlock,self).__init__()
        self.channels = channels

        self.conv1 = nn.Conv1d(channels,channels,kernel_size=3,padding=1)
        self.conv2 = nn.Conv1d(channels,channels,kernel_size=3,padding=1)
        self.se=SELayer(channels,16)
    def forward(self, x):
        y = F.relu(self.conv1(x))
        y = self.conv2(y)
        y=self.se(y)
        return F.relu(x+y)

class CNN(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.att=MutiheadAttention(len_embedding*2,512,512,64)
        self.layernorm=nn.LayerNorm(512)
        self.conv1 = nn.Conv1d(1, 32, 4)
        self.relu1=nn.LeakyReLU(0.2, inplace=True)
        self.rblock1 = ResidualBlock(32)
        self.conv2 = nn.Conv1d(32,64, 4)
        self.batchn1=nn.BatchNorm1d(64)
        self.relu2= nn.LeakyReLU(0.2, inplace=True)
        self.rblock2 = ResidualBlock(64)
        self.conv3=nn.Conv1d(64,128,4)
        self.batchn2=nn.BatchNorm1d(128)
        self.relu3= nn.LeakyReLU(0.2, inplace=True)
        self.rblock3 = ResidualBlock(128)
        self.conv4=nn.Conv1d(128,256,4)
        self.batchn3=nn.BatchNorm1d(256)
        self.relu4= nn.LeakyReLU(0.2, inplace=True)
        self.rblock4 = ResidualBlock(256)
        self.dropout=nn.Dropout()
        self.fc1 = nn.Linear(7424, num_relations)

    def forward(self, x):
        x=self.att(x)+x
        x=self.layernorm(x)
        x = self.conv1(x)
        x = self.relu1(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock1(x)
        x = self.conv2(x)
        x=self.batchn1(x)
        x=self.relu2(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock2(x)

        x = self.conv3(x)
        x=self.batchn2(x)
        x=self.relu3(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock3(x)

        x = self.conv4(x)
        x=self.batchn3(x)
        x=self.relu4(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock4(x)
        x = x.view(x.size()[0], -1)
        x=self.dropout(x)
        x=self.fc1(x)
        return x
geneEnc=nn.Sequential(
    nn.Dropout(),
    nn.Linear(len_geneExp, 100),
    nn.ReLU(),
            nn.Linear(100, 100),
            nn.ReLU(),
     nn.Linear(100, 100),
            nn.ReLU(),
    nn.Linear(100, 100),
            nn.ReLU(),
    nn.Linear(100, len_embedding),
)
geneDec = nn.Sequential(
    nn.Linear(len_embedding, len_geneExp),
    #nn.Linear(512, len_geneExp),

)
genemap = nn.Sequential(
    #nn.Dropout(),
     nn.Linear(len_embedding, 100),
     nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, 100),
   nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, 100),
   nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, len_embedding),

)



model=CNN()
device = torch.device("cuda:6")
device2 = torch.device("cuda:6")
path='/home/zhengtuo/songtao/dynamic1.pth'
pretrained_weights = torch.load(path)
model.load_state_dict(pretrained_weights['model'])
geneEnc.load_state_dict(pretrained_weights['geneEnc'])
genemap.load_state_dict(pretrained_weights['genemap'])
geneDec.load_state_dict(pretrained_weights['geneDec'])

model.to(device2)
geneEnc.to(device2)
geneDec.to(device2)
genemap.to(device2)


genemap.eval()
model.eval()
geneEnc.eval()
geneDec.eval()

[24]:

Sequential(
  (0): Linear(in_features=256, out_features=1646, bias=True)
)

[3]:

def ScanpyVolcanoPlot(adata, axis, use_adjusted_p=True, show_label=True, label_fold_change=2, label_log_p=80, add_grid=True, label_size=4, filter_labels=None, kept_labels=None, color1='#F09D30', color2='#3D5FA6', label_excludes=None):
    fig, ax = createFig()
    names = list(map(lambda x: x[axis], adata.uns['rank_genes_groups']['names']))
    log2foldchanges = list(map(lambda x: x[axis], adata.uns['rank_genes_groups']['logfoldchanges']))

    if use_adjusted_p:
        log10adjp = -np.log10(list(map(lambda x: x[axis], adata.uns['rank_genes_groups']['pvals_adj'])))
    else:
        log10adjp = -np.log10(list(map(lambda x: x[axis], adata.uns['rank_genes_groups']['pvals'])))

    # Filter out excluded genes
    if label_excludes is not None:
        include_indices = [i for i, name in enumerate(names) if name not in label_excludes]
        log2foldchanges = [log2foldchanges[i] for i in include_indices]
        log10adjp = [log10adjp[i] for i in include_indices]
        names = [names[i] for i in include_indices]

    ax.scatter(
        x=log2foldchanges,
        y=log10adjp,
        s=12,
        alpha=0.8,
        linewidth=0,
        c=list(map(lambda x: color2 if abs(x[0]) > label_fold_change and x[1] > label_log_p and x[0] < 0
                   else color1 if abs(x[0]) > label_fold_change and x[1] > label_log_p and x[0] > 0
                   else 'gray', zip(log2foldchanges, log10adjp)))
    )

    if add_grid:
        ax.axvline(-label_fold_change, color ="black", alpha = 0.8, lw = 0.8, ls='--')
        ax.axvline(label_fold_change, color ="black", alpha = 0.8, lw = 0.8, ls='--')
        ax.axhline(label_log_p, color ="black", alpha = 0.8, lw = 0.8, ls='--')
        ax.grid(alpha=0.4)
    if show_label:
        indices = list(map(lambda x: abs(x[0]) > label_fold_change and x[1] > label_log_p, zip(log2foldchanges, log10adjp)))
        for i, j, s in zip(np.array(log2foldchanges)[indices], np.array(log10adjp)[indices], np.array(names)[indices]):
            if label_excludes is not None and s in label_excludes:
                continue
            if filter_labels is not None and s in filter_labels:
                continue
            if kept_labels is not None and s not in kept_labels:
                continue
            ax.text(i, j, s=s, size=label_size)

    return fig, ax

[34]:

adata=cs.hf.read('/home/zhengtuo/songtao/9.20_carlin_withprob.h5ad')

[35]:

adata.obs['preference_HSC'].value_counts()

[35]:

Ery    207
My     179
Name: preference_HSC, dtype: int64

[32]:

#bowling_adata_=cs.hf.read("/home/zhengtuo/songtao/dynamic.h5ad")

[37]:

dic1={'Mo':'My','Neu':'My','Mk':'Ery','HSC':'HSC'}
adata.obs['reannotation']=adata.obs['reannotation'].map(dic1)
adata.obs['reannotation'].unique()

[37]:

array(['Ery', 'HSC', 'My'], dtype=object)

[38]:

fig,ax=createFig((5,5))
sc.pl.umap(adata, color='reannotation', palette = {
    "Ery": "#2B9E44",
    "My": "#289E9E",
    "HSC": '#E6DCD9'
}, s=32, ax=ax)

/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

[ ]:

[39]:

fig,ax=createFig((5,5))
sc.pl.umap(adata, color='preference_HSC', palette = {
    "Ery": "#2B9E44",
    "My": "#289E9E",
    "HSC": '#E6DCD9'
}, s=32, ax=ax)

/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

[418]:

import sklearn
sklearn.metrics.silhouette_score(
    adata[adata.obs['preference_HSC'].isin(['My','Ery'])].obsm["X_umap"],
    adata[adata.obs['preference_HSC'].isin(['My','Ery'])].obs['preference_HSC']
)

[418]:

0.3386568

[478]:

fig,ax=createFig((5,5))
sc.pl.umap(adata, color='fate_pred', palette = {
    "Ery": "#2B9E44",
    "My": "#289E9E",
    "HSC": '#E6DCD9',
    "-": '#E6DCD9',
}, s=32, ax=ax)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/carlin_figure/carlin_fate_pred.pdf', bbox_inches='tight')

/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

[419]:

def compute_velocity_on_grid(
    X_emb,
    M_emb,
    density=None,
    smooth=None,
    n_neighbors=None,
    min_mass=None,
    autoscale=True,
    adjust_for_stream=False,
    cutoff_perc=None,
):
    # remove invalid cells
    idx_valid = np.isfinite(X_emb.sum(1) + M_emb.sum(1))
    X_emb = X_emb[idx_valid]
    M_emb = M_emb[idx_valid]

    # prepare grid
    n_obs, n_dim = X_emb.shape
    density = 0.5 if density is None else density
    smooth = 0.5 if smooth is None else smooth

    grs = []
    for dim_i in range(n_dim):
        m, M = np.min(X_emb[:, dim_i]), np.max(X_emb[:, dim_i])
        m = m - 0.01 * np.abs(M - m)
        M = M + 0.01 * np.abs(M - m)
        gr = np.linspace(m, M, int(50 * density))
        grs.append(gr)

    meshes_tuple = np.meshgrid(*grs)
    X_grid = np.vstack([i.flat for i in meshes_tuple]).T

    # estimate grid velocities
    if n_neighbors is None:
        n_neighbors = int(n_obs / 50)
    nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=-1)
    nn.fit(X_emb)
    dists, neighs = nn.kneighbors(X_grid)

    scale = np.mean([(g[1] - g[0]) for g in grs]) * smooth
    weight = normal.pdf(x=dists, scale=scale)
    p_mass = weight.sum(1)

    V_grid = (M_emb[neighs] * weight[:, :, None]).sum(1)
    V_grid /= np.maximum(1, p_mass)[:, None]
    if min_mass is None:
        min_mass = 1

    if adjust_for_stream:
        X_grid = np.stack([np.unique(X_grid[:, 0]), np.unique(X_grid[:, 1])])
        ns = int(np.sqrt(len(V_grid[:, 0])))
        V_grid = V_grid.T.reshape(2, ns, ns)

        mass = np.sqrt((V_grid ** 2).sum(0))
        min_mass = 10 ** (min_mass - 6)  # default min_mass = 1e-5
        min_mass = np.clip(min_mass, None, np.max(mass) * 0.9)
        cutoff = mass.reshape(V_grid[0].shape) < min_mass

        if cutoff_perc is None:
            cutoff_perc = 5
        length = np.sum(np.mean(np.abs(M_emb[neighs]), axis=1), axis=1).T
        length = length.reshape(ns, ns)
        cutoff |= length < np.percentile(length, cutoff_perc)

        V_grid[0][cutoff] = np.nan
    else:
        min_mass *= np.percentile(p_mass, 99) / 100
        X_grid, V_grid = X_grid[p_mass > min_mass], V_grid[p_mass > min_mass]

        if autoscale:
            V_grid /= 3 * quiver_autoscale(X_grid, V_grid)

    return X_grid, V_grid

def quiver_autoscale(X_emb, V_emb):
    import matplotlib.pyplot as pl

    scale_factor = np.abs(X_emb).max()  # just so that it handles very large values
    fig, ax = pl.subplots()
    Q = ax.quiver(
        X_emb[:, 0] / scale_factor,
        X_emb[:, 1] / scale_factor,
        V_emb[:, 0],
        V_emb[:, 1],
        angles="xy",
        scale_units="xy",
        scale=None,
    )
    Q._init()
    fig.clf()
    pl.close(fig)
    return Q.scale / scale_factor

[420]:

import numpy as np
from scipy.stats import norm as normal
from sklearn.neighbors import NearestNeighbors
import tqdm
adata2=adata


sc.pp.neighbors(adata)

X = adata.obsm['geneEnc'][:,0,:]
M = adata.obsm['genemap'][:,0,:]
N = adata.obsp['connectivities']
E = adata.obsm["X_umap"]
norm = lambda x: (x-min(x))/(max(x)-min(x))
nn = NearestNeighbors(n_neighbors=5)
nn.fit(X)
dE = []
for i in tqdm.trange(N.shape[0]):

    D,I = nn.kneighbors( np.array([M[i]]) )
    dE.append( (E[I][0] - E[i]).mean(0))
dE = np.vstack(dE)
gE, gdE = compute_velocity_on_grid(E,dE,density=2, adjust_for_stream=True,smooth=True)
fig,ax=createFig()
fig.set_size_inches(5,5)
palette = sc.pl._tools.scatterplots._get_palette(adata, 'reannotation')
ax.scatter(E[:,0],E[:,1],s=10, linewidths=0,c=list(map(lambda x: palette[x], adata.obs['reannotation'])))
stream_kwargs = {
        "linewidth": 0.5,
        "density": 3,
        "zorder": 3,
        "color": "black",
        "arrowsize":  1,

        "arrowstyle": "-|>",
        "maxlength":  10,
        "integration_direction":  "both",
}
ax.streamplot(gE[0],gE[1],gdE[0],gdE[1],**stream_kwargs)

100%|███████████████████████████████████████| 612/612 [00:00<00:00, 1271.66it/s]

[420]:

<matplotlib.streamplot.StreamplotSet at 0x7f438e15d340>

[423]:

fig,ax=createFig(figsize=(5, 5))
fig.set_size_inches(5,5)
palette = sc.pl._tools.scatterplots._get_palette(adata, 'fate_pred')
ax.scatter(
    adata[adata.obs['preference_HSC'] == 'HSC'].obsm["X_umap"][:,0],
    adata[adata.obs['preference_HSC'] == 'HSC'].obsm["X_umap"][:,1],
    s = 2, color='#E6DCD9', linewidth=0
)
sc.pl.umap(
    adata[adata.obs['preference_HSC'] != 'HSC'],
    ax=ax,
    color='fate_pred',
    s=30,
    palette = {
    "Ery": "#2B9E44",
    "My": "#289E9E",
    "HSC": '#E6DCD9'
},
    show=False
)
stream_kwargs = {
        "linewidth": 0.5,
        "density": 2,
        "zorder": 3,
        "color": "black",
        "arrowsize":  1,

        "arrowstyle": "-|>",
        "maxlength":  10,
        "integration_direction":  "both",
}

ax.streamplot(gE[0],gE[1],gdE[0],gdE[1],**stream_kwargs)
plt.savefig("/home/zhengtuo/songtao/DestinyNet/carlin_figure/flow.pdf")

/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_utils.py:430: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  adata.uns[value_to_plot + '_colors'] = colors_list
/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

[424]:

sc.tl.leiden(adata, resolution=0.3)
adata.obs['fate_pred'].cat.add_categories(['-'], inplace=True)

adata.obs['fate_pred'] = adata.obs['fate_pred'].fillna('-')
sc.pl.umap(adata,color='leiden')

/tmp/ipykernel_97488/2133507794.py:2: FutureWarning: The `inplace` parameter in pandas.Categorical.add_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  adata.obs['fate_pred'].cat.add_categories(['-'], inplace=True)
/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

[426]:

adata.obs['fate_pred'].value_counts()

[426]:

Ery    236
-      226
My     150
Name: fate_pred, dtype: int64

[438]:

import seaborn as sns

xkey = 'geneEnc'
mkey = 'genemap'
X = adata.obsm['geneEnc'][:,0,:]
M = adata.obsm['genemap'][:,0,:]
N = adata.obsp['connectivities']

fig,ax=createFig()
fig.set_size_inches(6, 5)
palette={
 'Ery': '#2A9E27',
 'Lymphoid': '#E43E96',
 'Mast': '#B5BD61',
 'My': '#279E9E',
 'Mo': '#50C3C6',
 'Neu': '#FAA339',
 'ME': '#279E8F',
 'Ly': '#E43E96',
 'HSC': '#E6DCD9',
    '-':'#E6DCD9'
}
ax.scatter(E[:,0],E[:,1],s=1, linewidths=0,c=list(map(lambda x: palette[x], adata.obs['fate_pred'])))
# ax.scatter(E[:,0],E[:,1],s=1, linewidths=0,c=list(map(lambda x: sc.pl._tools.scatterplots._get_palette(biddy_adata, 'leiden')[x], biddy_adata.obs['leiden'])))
tmp = adata.obs

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))
# sc.pl.umap(weinreb_adata, ax=ax, color='Gata2')

i=np.random.choice(list(map(lambda z: z[0],
                            filter(lambda x: x[1] == '0',
                                   enumerate(adata.obs['leiden'])))), 1)
i
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(n_neighbors=40)
nn.fit(X)
D,I = nn.kneighbors(M[i])
e = E[I][0]


E = adata.obsm["X_umap"]

ax.scatter(
    adata.obsm["X_umap"][:,0],
    adata.obsm["X_umap"][:,1],
    color='#F1F1F1',
    s=2
)

stream_kwargs = {
        "linewidth": 0.5,
        "density": 1.8,
        "zorder": 3,
        "color": "black",
        "arrowsize":  1,
        "arrowstyle": "-|>",
        "maxlength":  10,
        "integration_direction":  "both",
}
ax.streamplot(gE[0],gE[1],gdE[0],gdE[1],**stream_kwargs)

ax.scatter(E[i,0],E[i,1],color='red',s=20,marker='*')
# sns.kdeplot(e[:,0],e[:,1],ax=ax, fill=True, alpha=0.5, cmap='Reds',cbar=True)
sns.kdeplot(x=e[:,0], y=e[:,1], ax=ax, fill=True, alpha=0.5, cmap='Reds', cbar=True)

[438]:

<Axes: >

[439]:

from matplotlib.backends.backend_pdf import PdfPages
with PdfPages('/home/zhengtuo/songtao/DestinyNet/carlin_figure/fateflow_hotpot4.pdf') as pdf:
    pdf.savefig(fig, bbox_inches='tight')

[168]:

top_genes

[168]:

Index(['Mllt3', 'Hnrnpa1', 'Eif5a', 'Scaper', 'Tmem71', 'Casc3', 'Hacd4',
       'Prnp', 'Neurl3', 'Zscan2', 'Nkx2-3', 'Psmb4', 'Jun', 'Ssb', 'Psmb8',
       'Ifi27', 'Flt3', 'Ctsl', 'H2-K1', 'Pde4b'],
      dtype='object')

[61]:

fig,ax=createFig(figsize=(4,4))
sc.pl.umap(adata,color='Ccl9',ax=ax,s=50,cmap='Reds',vmin=0)#Apoe,Ifitm1,Mpl,Dntt

[440]:

#adata2.obsm['X_pca']=adata2.obsm['X_pca'].cpu().numpy()
adata3=adata2[(adata2.obs['reannotation']=='HSC')&(adata2.obs['fate_pred'].isin(['My','Ery']))]
from sklearn.neighbors import KNeighborsClassifier
plt.rcParams['font.size'] = 14
fig,ax=createFig(figsize=(3,3))
sc.pl.umap(adata3,color=['fate_pred'],ax=ax,s=10)
sc.tl.rank_genes_groups(adata3, groupby='fate_pred', method='t-test_overestim_var')

sc.pl.heatmap(
    adata3[list(map(lambda x: type(x) == str, adata3.obs['fate_pred']))],
    pd.DataFrame(adata3.uns['rank_genes_groups']['names']).head(16).to_numpy().flatten(),
    groupby='fate_pred', cmap='Reds'

)

/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_utils.py:430: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  adata.uns[value_to_plot + '_colors'] = colors_list
/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

[ ]:

"Ery": "#2B9E44",
    "My": "#289E9E",

[442]:

plt.rcParams['font.size'] = 8
fig,ax=ScanpyVolcanoPlot(
    adata3,
    1, label_log_p=1.3,
    show_label=True,
    label_fold_change=1,
    label_size=2,
    add_grid=False,
    color1='#289E9E',
    color2='#2B9E44',
)
ax.set_xbound(-7,7)
fig.set_size_inches(4,4)
fig.savefig("/home/zhengtuo/songtao/DestinyNet/carlin_figure/carlin_volcano.pdf", dpi=1000)

[63]:

plt.rcParams['font.size'] = 8
fig,ax=ScanpyVolcanoPlot(
    adata3,
    1, label_log_p=1.3,
    show_label=False,
    label_fold_change=1,
    label_size=2,
    add_grid=False,
    color1='#289E9E',
    color2='#2B9E44',
)
ax.set_xbound(-7,7)
fig.set_size_inches(4,4)
fig.savefig("/home/zhengtuo/songtao/DestinyNet/carlin_figure/carlin_volcano_nowords.pdf", dpi=1000)

[388]:

adata3_GT=adata[adata.obs['preference_HSC'].isin(['My','Ery'])]
sc.tl.rank_genes_groups(adata3_GT, groupby='preference_HSC', method='t-test_overestim_var')

/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:580: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  adata.uns[key_added] = {}

[64]:

plt.rcParams['font.size'] = 8
fig,ax=ScanpyVolcanoPlot(
    adata3_GT,
    1, label_log_p=1.3,
    show_label=True,
    label_fold_change=1,
    label_size=4,
    add_grid=False,
    color1='#289E9E',
    color2='#2B9E44',
)
ax.set_xbound(-7,7)
fig.set_size_inches(4,4)
fig.savefig("/home/zhengtuo/songtao/DestinyNet/carlin_figure/carlin_volcano_GT.pdf", dpi=1000)

[65]:

plt.rcParams['font.size'] = 8
fig,ax=ScanpyVolcanoPlot(
    adata3_GT,
    1, label_log_p=1.3,
    show_label=False,
    label_fold_change=1,
    label_size=4,
    add_grid=False,
    color1='#289E9E',
    color2='#2B9E44',
)
ax.set_xbound(-7,7)
fig.set_size_inches(4,4)
fig.savefig("/home/zhengtuo/songtao/DestinyNet/carlin_figure/carlin_volcano_GT_nowords.pdf", dpi=1000)

[378]:

adata

[378]:

AnnData object with n_obs × n_vars = 1558 × 20863
    obs: 'sample_name', 'leiden', 'reannotation', 'preference', 'preference_HSC', 'preference_MPP', 'index', 'fate_pred', 'prob'
    uns: 'fate_pred_colors', 'neighbors', 'preference_HSC_colors', 'reannotation_colors', 'umap'
    obsm: 'X_emb', 'X_emb_old', 'X_pca', 'X_pca_sct', 'X_umap', 'X_umap_sct', 'geneEnc', 'genemap'
    obsp: 'connectivities', 'distances'

[41]:

import seaborn as sns
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable
import torch.nn as nn

def compute_gradients_for_label(target_label, data, model_encoder, model_decoder):
    mask = data.obs['fate_pred'] == target_label
    filtered_data = data[mask]

    geneex_data = torch.tensor(filtered_data.X.toarray(), dtype=torch.float32).to(device2)

    dataset = TensorDataset(geneex_data)
    dataloader = DataLoader(dataset, batch_size=256, shuffle=False)

    total_gradients = torch.zeros(len_geneExp).to(device2)
    for batch in dataloader:
        geneExp = batch[0]
        geneExp = Variable(geneExp, requires_grad=True)
        output_data = model_encoder(geneExp)
        output_data = model_decoder(output_data)

        loss_function = nn.MSELoss()
        loss = loss_function(output_data, geneExp)

        model_encoder.zero_grad()
        model_decoder.zero_grad()
        loss.backward()

        total_gradients += geneExp.grad.abs().mean(dim=0)

    average_gradients = total_gradients / len(dataloader)
    return average_gradients

labels = ['My', 'Ery']
gradients_per_label = {}

for label in labels:
    gradients_per_label[label] = compute_gradients_for_label(label, adata, geneEnc, geneDec)

[42]:

num_key_genes = 50

key_genes_per_label = {}

for label, gradients in gradients_per_label.items():
    df = pd.DataFrame({
        'gene_ids': adata.var.index.values,
        'importance': gradients.cpu().detach().numpy()
    })

    sorted_df = df.sort_values(by="importance", ascending=False)

    key_genes = sorted_df['gene_ids'].head(num_key_genes).tolist()
    key_genes_per_label[label] = key_genes

for label, genes in key_genes_per_label.items():
    print(f"Key genes for label {label}:")
    print(', '.join(genes))
    print("\n")

Key genes for label My:
H2afy, Ifitm1, Srgn, Plac8, Ctla2a, Lmo2, Rpl31, Ncl, Sox4, Ifitm3, Zfp36l2, Ptprcap, Car2, Calr, Tmsb10, Vim, Jun, Hspe1, Stmn1, Cdk6, Prtn3, Cd34, Adgrl4, Adgrg1, Tubb5, Mpo, Pim1, Txnip, Msi2, Pnrc1, Ldha, Mef2c, Ogt, Samsn1, Akap13, Ier2, Serinc3, Ubc, Pnisr, Fos, Ifi203, Egfl7, Jund, Limd2, Hmgb2, Dapp1, Tspo, Hlf, H2-T23, Serpinb1a


Key genes for label Ery:
H2afy, Ifitm1, Srgn, Lmo2, Ctla2a, Rpl31, Ifitm3, Plac8, Zfp36l2, Sox4, Car2, Ncl, Ptprcap, Calr, Txnip, Prtn3, Vim, Hlf, Tmsb10, Adgrg1, Pim1, Pnrc1, Adgrl4, Stmn1, Ogt, Pnisr, Cd34, Ifi203, Msi2, Hspe1, Serinc3, Gimap1, Ptpre, Dapp1, Cdk6, Akap13, Jun, Tubb5, Aes, H2-Q7, Angpt1, Limd2, Gimap6, Cd63, Ubc, Foxp1, Ltb, Tspo, H2-T23, Dcun1d1

[56]:

gradient_data = {}
for label, gradients in gradients_per_label.items():
    gradient_data[label] = gradients.cpu().detach().numpy()

df_gradients_all = pd.DataFrame(gradient_data, index=adata.var.index.values)

def normalize(column):
    """Normalize a pandas column to range between 0 and 1"""
    min_val = column.min()
    max_val = column.max()
    return (column - min_val) / (max_val - min_val)

# Normalize gradients for each label
for label in labels:
    df_gradients_all[label] = normalize(df_gradients_all[label])

# Compute difference between normalized gradients for the two labels
df_gradients_all['diff'] = (df_gradients_all[labels[0]] - df_gradients_all[labels[1]])

# Select the top 20 genes with the largest gradient difference
top_genes = df_gradients_all['diff'].abs().nlargest(100).index

# Extract the normalized gradient values for the top genes
df_gradient_top = df_gradients_all.loc[top_genes, labels]

# Plot heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df_gradient_top, cmap="coolwarm", annot=True, cbar_kws={'label': 'Normalized Gradient Difference'})
plt.title("Normalized Gradient Difference of Top 20 Differentially Expressed Genes across Labels", fontsize=15)
plt.ylabel('Top Genes', fontsize=12)
plt.xlabel('Labels', fontsize=12)
plt.tight_layout()
plt.show()

[57]:

import matplotlib.pyplot as plt
df_gradients_all_normalized = df_gradients_all.copy()
for label in labels:
    df_gradients_all_normalized[label] = (df_gradients_all[label] - df_gradients_all[label].min()) / (df_gradients_all[label].max() - df_gradients_all[label].min())
num=50
df_gradients_all_normalized[labels[0]] = -df_gradients_all_normalized[labels[0]]

df_gradients_all_normalized['bias'] = df_gradients_all_normalized[labels[1]] + df_gradients_all_normalized[labels[0]]

sorted_df_right_bias = df_gradients_all_normalized.sort_values(by='bias', ascending=False).head(num)
sorted_df_right_bias =sorted_df_right_bias.sort_values(by='bias', ascending=True)

sorted_df_left_bias = df_gradients_all_normalized.sort_values(by='bias').head(num)
sorted_df_left_bias =sorted_df_left_bias.sort_values(by='bias', ascending=False)
top_genes_bias = pd.concat([sorted_df_left_bias, sorted_df_right_bias])
#top_genes_bias = top_genes_bias.sort_values(by='bias', key=abs, ascending=False)

fig, ax = plt.subplots(figsize=(12, 18))

colors = ['blue', 'red']

for gene in top_genes_bias.index:
    for i, label in enumerate(labels):
        x_value = top_genes_bias.loc[gene, label]
        y_value = list(top_genes_bias.index).index(gene)
        ax.plot(x_value, y_value, marker='o', color=colors[i])
        if i == 0:
            x_values = [top_genes_bias.loc[gene, labels[0]], top_genes_bias.loc[gene, labels[1]]]
            y_values = [y_value, y_value]
            ax.plot(x_values, y_values, linestyle='-', color='grey')

ax.axvline(0, color='black', linestyle='--')

divider_position = num
ax.axhline(divider_position - 0.5, color='green', linestyle='--')

ax.set_yticks(range(len(top_genes_bias.index)))
ax.set_yticklabels(top_genes_bias.index)
ax.set_title('Top 20 Genes with Highest Gradient Bias')
ax.set_xlabel('Normalized Gradient Value')
ax.legend(labels, loc='upper right')

plt.tight_layout()
plt.show()
fig.savefig("/home/zhengtuo/songtao/DestinyNet/carlin_figure/carlin_lolipop_plot_top50.pdf", dpi=1000)

[25]:

dic1={'Mo':'My','Neu':'My','Mk':'Ery','HSC':'HSC','Ery':'Ery'}

adata.obs['reannotation']=adata.obs['reannotation'].map(dic1)
adata.obs['reannotation'].value_counts()

[25]:

HSC    386
Ery    119
My     107
Name: reannotation, dtype: int64

[59]:

import pandas as pd

df = pd.read_csv('/home/zhengtuo/songtao/tf.csv')

filtered_df = df[df['Symbol'].isin(top_genes)]['Symbol']

print(filtered_df.values)

['Hlf' 'Egr1' 'Zfp36l2' 'Zfp287' 'Zbtb38' 'Pura' 'Pou2f2' 'Nr4a1' 'Klf6'
 'Klf2' 'Junb' 'Jun' 'Irf8' 'Id1' 'Fosb' 'Fos' 'Cebpb' 'Bcl11a']

[62]:

import anndata
# df_expression_all['diff'] = df_expression_all[labels[0]] - df_expression_all[labels[1]]
top_genes =  top_genes_bias[::-1].index

sc.pl.dotplot(adata[adata.obs['fate_pred']!='-'],
              var_names=filtered_df.values,
              groupby='fate_pred',
              figsize=(8, 3),
              dot_max=0.5,

              color_map='Reds',
              title="Top Differentially Expressed Genes across Cell Fate",
              vmin=0.1,
              vmax=2,
              save='carlin_dotplot_top50_tf.pdf')

WARNING: saving figure to file figures/dotplot_carlin_dotplot_top50_tf.pdf

/home/zhengtuo/miniconda3/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

[137]:

x_indices = list(map(lambda x: x[0], sorted(enumerate(_biddy_adata.obs['dic_fate_prob']), key=lambda x: x[1])))
var_names = ['Apoa1','S100a13','Gsto1','Msln','Cd9','Timp3','Rbp1','2200002D01Rik','Ccnd1','S100a6','Ly6a','Sorbs2','Anxa3','Plac8','Spint2','Fabp5','Fbn2','Thbs1','Fstl1','Igf2','Rcn3','Serpine2','Col5a2','Aebp1','Cdh11','Itm2a','Ifitm2','Igfbp4','Col6a1','Ogn','Cxcl12','Cryab','Bgn','Marcks','Cxcl15','Fzd1','Gas1','Spp1','Svep1','Loxl1','Mmp2','Ifitm3','Col1a2','Tcf4','Sparc','Peg3','Col1a1','Igfbp2','Col3a1','Il6st']
y_indices = list(map(lambda x: list(_biddy_adata.var.index).index(x), var_names))

expr = _biddy_adata.raw.to_adata().X[x_indices,:,][:,y_indices]
expr = pd.DataFrame(expr, columns=var_names)
cmap = make_colormap(['#E4B748','#F7F7F7','#E5499B'])
fig=sns.clustermap(
    expr,
    row_cluster=False,
    col_cluster=False,
    z_score=1,

    cmap='seismic',
    col_colors=['#E5499B'] * 16 + ['#E4B748'] * (len(var_names) - 16),
    row_colors=list(map(lambda x: cmap(x), _biddy_adata.obs['dic_fate_prob'][x_indices])),
    center=0
)
fig.savefig("/Users/snow/Downloads/20230914_LT_Pseudotime_Biddy_prob_gene_exp/heatmap.pdf")
fig.savefig("/Users/snow/Downloads/20230914_LT_Pseudotime_Biddy_prob_gene_exp/heatmap.png")

[137]:

'#289E9E'

[409]:

adata2.obs['prob']

[409]:

AAACGCTTCTTCCTAA-1                   None
AAAGGATGTCAGACTT-1    0.07216494530439377
AAAGGATGTGGGTATG-1                   None
AACAAAGAGTAGGATT-1                   None
AACAACCTCATGCTAG-1                   None
                             ...
TTTGATCTCGGCATCG-1    0.34117648005485535
TTTGGAGGTACCCGCA-1     0.4193548262119293
TTTGGAGTCGGCATCG-1                   None
TTTGGTTCAATGAGCG-1                   None
TTTGTTGTCGGTAAGG-1                   None
Name: prob, Length: 1558, dtype: category
Categories (503, object): ['0.0', '0.5', '0.25', '0.75', ..., '0.061093248426914215', '0.0062500000931322575', '1.0', 'None']

[17]:

df_gradients_all['abs_diff'] = df_gradients_all['diff'].abs()
sorted_df = df_gradients_all.sort_values(by=['abs_diff', 'diff'], ascending=[False, True])
top_20_genes = sorted_df.head(20)

top_20_genes_sorted = top_20_genes.sort_values(by=['diff', 'abs_diff'], ascending=[False, False])
var_names = top_20_genes_sorted.index.tolist()

top_20_genes_sorted

[17]:

	My	Ery	diff	abs_diff
Fos	0.360704	0.223209	0.137495	0.137495
Klf2	0.145687	0.051841	0.093846	0.093846
Mpo	0.401033	0.318277	0.082756	0.082756
Egr1	0.244896	0.163873	0.081022	0.081022
Gimap1	0.329236	0.407452	-0.078215	0.078215
Ifi203	0.357288	0.436235	-0.078947	0.078947
Slamf1	0.033929	0.113425	-0.079496	0.079496
Hbb-bt	0.138137	0.218351	-0.080214	0.080214
Gcnt2	0.266860	0.347409	-0.080550	0.080550
Ltb	0.293661	0.375018	-0.081357	0.081357
Glul	0.226999	0.310997	-0.083998	0.083998
Lmo2	0.656750	0.742076	-0.085326	0.085326
Ifitm3	0.580829	0.667090	-0.086261	0.086261
Pnisr	0.360770	0.448239	-0.087470	0.087470
H2-Q7	0.303556	0.394600	-0.091044	0.091044
Zfp36l2	0.552694	0.644673	-0.091979	0.091979
Angpt1	0.299832	0.393674	-0.093841	0.093841
Txnip	0.385906	0.488796	-0.102890	0.102890
Car2	0.519420	0.639244	-0.119823	0.119823
Hlf	0.341662	0.478859	-0.137197	0.137197

[20]:

#adata.obs['prob']=adata.obs['prob'].astype(float)
adata2 = adata[(adata.obs['prob'])!='None']
adata2.obs['prob']=adata2.obs['prob'].astype(float)
x_indices = list(map(lambda x: x[0], sorted(enumerate(adata2.obs['prob']), key=lambda x: x[1])))
var_names = top_genes_bias.index
y_indices = list(map(lambda x: list(adata2.var.index).index(x), var_names))

expr = adata2.X[x_indices,:,][:,y_indices].toarray()
expr = pd.DataFrame(expr, columns=var_names)
cmap = make_colormap(['#289E9E','#F7F7F7','#2B9E44'])

fig=sns.clustermap(
    expr,
    row_cluster=False,
    col_cluster=False,
    z_score=1,

#     vmin=-2,
#     vmax=2,
    cmap='seismic',
    col_colors=['#289E9E'] * 10 + ['#2B9E44'] * (len(var_names) - 10),
    row_colors = list(map(lambda x: cmap(float(x) ), adata2.obs['prob'][x_indices])),
    center=0
)
#fig.savefig('/home/zhengtuo/songtao/DestinyNet/carlin_figure/heatmap.pdf')
# fig.savefig("/Users/snow/Downloads/20230914_LT_Pseudotime_Biddy_prob_gene_exp/heatmap.png")

/tmp/ipykernel_93469/2197485064.py:3: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.
  adata2.obs['prob']=adata2.obs['prob'].astype(float)

[395]:

adata3_GT

[395]:

AnnData object with n_obs × n_vars = 777 × 20863
    obs: 'sample_name', 'leiden', 'reannotation', 'preference', 'preference_HSC', 'preference_MPP', 'index', 'fate_pred', 'prob'
    uns: 'fate_pred_colors', 'neighbors', 'preference_HSC_colors', 'reannotation_colors', 'umap', 'rank_genes_groups'
    obsm: 'X_emb', 'X_emb_old', 'X_pca', 'X_pca_sct', 'X_umap', 'X_umap_sct', 'geneEnc', 'genemap'
    obsp: 'connectivities', 'distances'

[389]:

import pandas as pd

genes_df = pd.DataFrame(adata3_GT.uns['rank_genes_groups']['names'])

GT_10_genes_group_0 = genes_df.iloc[:10, 0]
GT_10_genes_group_1 = genes_df.iloc[:10, 1]

GT_20_genes = GT_10_genes_group_0.append(GT_10_genes_group_1)

genes_index = pd.Index(GT_20_genes)

/tmp/ipykernel_97488/922251399.py:11: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  GT_20_genes = GT_10_genes_group_0.append(GT_10_genes_group_1)

[390]:

genes_index

[390]:

Index(['mt-Co3', 'H2-Q7', 'H2-Aa', 'Rps25', 'Rpl30', 'Rpl34', 'Rpl37a',
       'Rpl35a', 'Hist1h4d', 'Rpl37', 'Rplp0', 'Rps3a1', 'Gapdh', 'Hspa8',
       'Cd48', 'Ubald2', 'Gm8797', 'Ccl9', 'Rpl8', 'Ifitm1'],
      dtype='object')

[ ]:

from scipy.stats import zscore

selected_data = adata2[adata2.obs['fate_pred'].isin(['Ery','My'])]

z_scores = zscore(selected_data.X.toarray(), axis=0)

selected_data.X = z_scores
sc.pl.heatmap(
    selected_data,
    top_genes,
    groupby='fate_pred',
    cmap='RdBu_r',
    vmin=-3,
    vmax=3
)

[ ]:

sns.clustermap(
selected_data,
    top_genes,

)

[8]:

import tqdm
from scanpy.neighbors import _compute_connectivities_umap

weinreb_adata_to_plot=adata2.copy()
weinreb_adata_mon =  weinreb_adata_to_plot[
        (np.array(weinreb_adata_to_plot.obs['reannotation'] == 'Ery') |
    np.array(weinreb_adata_to_plot.obs['fate_pred'] == 'Ery') )
    #np.array(biddy_adata.obs['leiden'] != '9')  & np.array(biddy_adata.obs['leiden'] != '13')
]

nn = NearestNeighbors(n_neighbors=20)
nn.fit(weinreb_adata_mon.obsm["geneEnc"][:,0,:])
D,I=nn.kneighbors(weinreb_adata_mon.obsm["geneEnc"][:,0,:])
# weinreb_adata_to_plot.uns['dE']
from scipy import spatial
II,DD=[],[]
pbar = tqdm.tqdm(total=len(weinreb_adata_mon))
idE = dE[(np.array(weinreb_adata_to_plot.obs['reannotation'] == 'Ery') |
    np.array(weinreb_adata_to_plot.obs['fate_pred'] == 'Ery') ) ]

for i,d in zip(I,D):
    index = list(map(lambda x: spatial.distance.cosine(x-weinreb_adata_mon.obsm["X_umap"][i[0]], idE[i[0]] ) > 0, weinreb_adata_mon.obsm["X_umap"][i[1:]]))
    II.append(i[1:][index][:8])
    DD.append(d[1:][index][:8])
    pbar.update(1)
II,DD=np.vstack(II),np.vstack(DD)
import warnings
from scipy.sparse import coo_matrix
II,DD=np.vstack(II),np.vstack(DD)
Dc,Cc=_compute_connectivities_umap(II,DD,weinreb_adata_mon.shape[0],8)
Dc,Cc=_compute_connectivities_umap(I,D,weinreb_adata_mon.shape[0],8)
weinreb_adata_mon.obsp['connectivities']=Cc
weinreb_adata_mon.obsp['distances']=Dc

undiff_cells = weinreb_adata_mon[weinreb_adata_mon.obs['reannotation'] == 'Ery']
min_x_index = np.argmax(undiff_cells.obsm["X_umap"][:, 0])
root_cell_index = undiff_cells.obs.index[min_x_index]
weinreb_adata_mon.uns['iroot'] = np.where(weinreb_adata_mon.obs.index == root_cell_index)[0][0]

sc.tl.diffmap(weinreb_adata_mon)
sc.tl.dpt(weinreb_adata_mon)
weinreb_adata_mon.obs['dpt_pseudotime'] = 1-weinreb_adata_mon.obs['dpt_pseudotime']

[9]:

fig, axes = plt.subplots(4, 1)
fig.set_size_inches(5, 10)
for i, ax in zip(['Apoe', 'Flt3','Mllt3'], axes):
    varindex = list(weinreb_adata_mon.var.index).index(i)
    indices = weinreb_adata_mon[:, varindex].X.toarray().ravel() != 0
    sns.regplot(
        x=weinreb_adata_mon.obs.loc[indices, 'dpt_pseudotime'],
        y=weinreb_adata_mon[indices].X.toarray()[:, varindex].ravel(),
        scatter_kws={
            's': 10,
            'linewidth': 0,
            'color': list(map(lambda x: plt.cm.viridis(x), weinreb_adata_mon.obs.loc[indices, 'dpt_pseudotime']))
        },
        lowess=True,
        ax=ax
    )

[10]:

# sc.pl.umap(weinreb_adata_mon,color='dpt_pseudotime')

[11]:

# sc.pl.umap(weinreb_adata_mon,color='reannotation')

[19]:

from colour import Color
from matplotlib.colors import LinearSegmentedColormap
def make_colormap( colors, show_palette = False ):
    color_ramp = LinearSegmentedColormap.from_list( 'my_list', [ Color( c1 ).rgb for c1 in colors ] )
    if show_palette:
        plt.figure( figsize = (15,3))
        plt.imshow( [list(np.arange(0, len( colors ) , 0.1)) ] , interpolation='nearest', origin='lower', cmap= color_ramp )
        plt.xticks([])
        plt.yticks([])
    return color_ramp

[467]:

fig,ax=createFig()
fig.set_size_inches(5,5)
adata.obs['prob'] =  list(map(lambda x: 0 if not x or x == 'None' else float(x), adata.obs['prob']))
ax.scatter(
    adata[list(map(lambda x: x != 'HSC', adata.obs['reannotation']))].obsm["X_umap"][:,0],
    adata[list(map(lambda x: x != 'HSC', adata.obs['reannotation']))].obsm["X_umap"][:,1],
    s = 10, color='#E6DCD9', linewidth=0
)
sc.pl.umap(
    adata[list(map(lambda x: x == 'HSC', adata.obs['reannotation']))],
    ax=ax,
    color='prob',
    s=30,
    cmap=make_colormap(['#289E9E','#F7F7F7','#2B9E44']),
    show=False
)
ax.streamplot(gE[0],gE[1],gdE[0],gdE[1],**stream_kwargs)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/carlin_figure/flow.pdf', bbox_inches='tight')

[468]:

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

colormap=make_colormap(['#289E9E','#F7F7F7','#2B9E44'])
data = adata[adata.obs['reannotation']=='HSC'].obs['prob']

plt.figure(figsize=(8, 6))
n, bins, patches = plt.hist(data, bins=30, edgecolor='black', alpha=0.7)

norm = plt.Normalize(data.min(), data.max())

for thisbin, thispatch in zip(bins, patches):
    color = colormap(norm(thisbin))
    thispatch.set_facecolor(color)

plt.title("Distribution of fate_pred_prob")
plt.xlabel("fate_pred_prob")
plt.ylabel("Number of Cells")
plt.savefig("/home/zhengtuo/songtao/DestinyNet/carlin_figure/hisgram_carlin.pdf", bbox_inches='tight')

plt.show()

[81]:

top_genes

[81]:

Index(['Mllt3', 'Hnrnpa1', 'Eif5a', 'Scaper', 'Tmem71', 'Casc3', 'Hacd4',
       'Prnp', 'Neurl3', 'Zscan2', 'Nkx2-3', 'Psmb4', 'Jun', 'Ssb', 'Psmb8',
       'Ifi27', 'Flt3', 'Ctsl', 'H2-K1', 'Pde4b'],
      dtype='object')

[ ]:

[ ]:

[ ]: