Li et al., 2023 Datasets (DARLIN)

[ ]:
import cospar as cs
import scanpy as sc
import pandas as pd
import scipy.sparse as ssp
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
import os
adata_orig=cs.hf.read("/home/zhengtuo/songtao/DestinyNet/tissue_adata_refined_20221106_joint.h5ad")
adata_orig
[792]:
# 检查最大值和最小值
max_value = np.max(adata_orig.X)
min_value = np.min(adata_orig.X)

print(f"Maximum value: {max_value}")
print(f"Minimum value: {min_value}")
Maximum value: 821.181884765625
Minimum value: 0.0
[763]:
adata_orig=adata_orig[adata_orig.obs['tissue'].isin(['Skull'])]
adata_orig=adata_orig[adata_orig.obs['state_info']!='Dc']
adata_orig.uns['data_des']=['Skull']

adata_orig.obs['cell_type']=adata_orig.obs['cell_type'].astype(str)
adata_orig.obs['cell_type']=pd.Categorical(adata_orig.obs['cell_type']).set_categories(['HSC', 'MkP','Ery', 'Baso', 'Neu', 'Mon', 'LMPP'], ordered=True)
adata_orig.uns['cell_type_colors']=['#d62728','#ff7f0e', '#1f77b4', '#279e68',  '#aa40fc', '#8c564b', '#e377c2'] #,'#d9d9d9']
adata_orig.obs['state_info']=adata_orig.obs['cell_type'].astype(str)
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
[764]:
time_map={'LMPP':'t0', 'HSC':'t0', 'NA':'t1','Baso':'t1', 'MkP':'t1', 'Mon':'t1',  'Ery':'t1','Neu':'t1', 'Dc':'t1'}
adata_orig.obs['time_info']=adata_orig.obs['cell_type'].map(time_map)
cs.pp.initialize_adata_object(adata_orig)
Clones without any cells are removed.
Time points with clonal info: ['t0' 't1']
WARNING: Default ascending order of time points are: ['nan' 't0' 't1']. If not correct, run cs.hf.update_time_ordering for correction.
WARNING: Please make sure that the count matrix adata.X is NOT log-transformed.
[764]:
AnnData object with n_obs × n_vars = 6094 × 21406
    obs: 'library', 'batch', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'predicted_doublet', 'cell_type', 'state_info', 'time_info', 'tissue', 'state_info_old'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable'
    uns: 'cell_type_colors', 'clonal_time_points', 'clone_id', 'data_des', 'library_colors', 'neighbors', 'scrublet', 'time_ordering', 'tissue_colors', 'umap'
    obsm: 'X_clone', 'X_emb', 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
[798]:
df_clone=cs.tl.clone_statistics(adata_orig,joint_variable='state_info');
cs.tl.add_clone_id_for_each_cell(adata_orig)

                  clone_N  clone_fraction
state_info
Baso                   22        0.022893
Baso,Ery                4        0.004162
Baso,Ery,Neu            3        0.003122
Baso,HSC                1        0.001041
Baso,HSC,Neu            1        0.001041
...                   ...             ...
Mon,MkP,HSC             2        0.002081
Mon,MkP,LMPP,HSC        1        0.001041
Mon,Neu                33        0.034339
Neu                   119        0.123829
Neu,Mon,HSC             2        0.002081

[97 rows x 2 columns]
-----------
            cell_N  clonal_cell_fraction
state_info
Baso            97              0.034336
Ery            459              0.162478
HSC            552              0.195398
LMPP           197              0.069735
MkP            215              0.076106
Mon            573              0.202832
Neu            732              0.259115
add information at obs['clone_id']
[799]:
adata3=adata_orig[adata_orig.obs['state_info'].isin(['Neu','Ery','MkP','Mon',"HSC"])].copy()
adata3.obs
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)
[799]:
library batch n_genes n_counts n_genes_by_counts total_counts total_counts_mt pct_counts_mt doublet_score predicted_doublet ... state_info_old fate fate_map_transition_map_MkP fate_map_transition_map_Ery fate_map_transition_map_Neu fate_map_transition_map_Mon Fate_bias GT clone clone_id
0 LL837_skull 1 2182 10000.000000 2182 8330.0 566.0 6.794718 0.043403 0 ... Mon 2 NaN NaN NaN NaN NaN nan 0
1 LL837_skull 1 2247 10000.000000 2247 10328.0 128.0 1.239349 0.048237 0 ... Ery 2 NaN NaN NaN NaN NaN nan 0
2 LL837_skull 1 4151 9999.999023 4150 29156.0 420.0 1.440527 0.012388 0 ... Mon 2 NaN NaN NaN NaN NaN nan 0
3 LL837_skull 1 1515 10000.000000 1515 6034.0 146.0 2.419622 0.146524 0 ... Meg 2 NaN NaN NaN NaN NaN nan 0
4 LL837_skull 1 1782 10000.000000 1782 6942.0 108.0 1.555748 0.022658 0 ... Meg 2 NaN NaN NaN NaN NaN nan 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5480 LL837_skull 1 4018 9999.999023 4018 28538.0 532.0 1.864181 0.019007 0 ... Ery 2 NaN NaN NaN NaN NaN nan 0
5481 LL837_skull 1 3561 10000.000000 3561 22548.0 534.0 2.368281 0.015441 0 ... Mon 2 NaN NaN NaN NaN NaN nan 73 73
5482 LL837_skull 1 4073 9999.999023 4073 29120.0 474.0 1.627747 0.019994 0 ... Ery 2 NaN NaN NaN NaN NaN nan 0
5483 LL837_skull 1 3697 10000.000977 3697 26760.0 632.0 2.361734 0.016280 0 ... Neu 2 NaN NaN NaN NaN NaN nan 412 412
5485 LL837_skull 1 3283 9999.999023 3283 21940.0 274.0 1.248861 0.023810 0 ... Neu 2 NaN NaN NaN NaN NaN nan 344 344

4919 rows × 24 columns

[768]:
np.sum((adata3.obsm['X_clone'].sum(0)>0).A.flatten())

[768]:
878
[745]:
adata_orig.obs['state_info'].value_counts()#1179
[745]:
HSC     2594
Ery     2035
Neu     1510
LMPP    1179
Mon     1087
Meg      618
Dc       390
Baso     334
Lym       43
Name: state_info, dtype: int64
[731]:
adata.obs['state_info'].unique()
[731]:
['Mon', 'Ery', 'MkP', 'HSC', 'Neu', 'LMPP', 'Baso']
Categories (7, object): ['Baso', 'Ery', 'HSC', 'LMPP', 'MkP', 'Mon', 'Neu']
[800]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
from colour import Color
from matplotlib.colors import LinearSegmentedColormap
def make_colormap( colors, show_palette = False ):
    color_ramp = LinearSegmentedColormap.from_list( 'my_list', [ Color( c1 ).rgb for c1 in colors ] )
    if show_palette:
        plt.figure( figsize = (15,3))
        plt.imshow( [list(np.arange(0, len( colors ) , 0.1)) ] , interpolation='nearest', origin='lower', cmap= color_ramp )
        plt.xticks([])
        plt.yticks([])
    return color_ramp
[801]:
import scanpy as sc
import numpy as np
import operator
import pandas as pd
import torch.nn.functional as F
import torch.nn as nn
import scanpy as sc
import matplotlib.pyplot as plt
import torch
import os
#setPltLinewidth(0)
# os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,6,7"

from torch.nn import DataParallel
import os
import anndata as ad
import cospar as cs
import scanpy as sc
def createFig(figsize=(8, 4)):
    fig,ax=plt.subplots()
    ax.spines['right'].set_color('none')
    ax.spines['top'].set_color('none')
    #ax.spines['bottom'].set_color('none')
    #ax.spines['left'].set_color('none')
    for line in ax.yaxis.get_ticklines():
        line.set_markersize(5)
        line.set_color("#585958")
        line.set_markeredgewidth(0.5)
    for line in ax.xaxis.get_ticklines():
        line.set_markersize(5)
        line.set_markeredgewidth(0.5)
        line.set_color("#585958")
    ax.set_xbound(0,10)
    ax.set_ybound(0,10)
    fig.set_size_inches(figsize)
    return fig,ax
def setPltLinewidth(linewidth:float):
    mpl.rcParams['axes.linewidth'] = linewidth
import matplotlib as mpl
fig,ax=createFig()
fig.set_size_inches(5,5)
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
setPltLinewidth(1)
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 14
plt.rcParams['axes.linewidth'] = 1

../_images/notebooks_Darlin_11_0.png
[805]:
exclude_prefixes = ['MT', 'RP', 'HSP', 'DNAJ']
exclude_condition = ~adata_orig.var_names.str.lower().str.startswith(tuple([prefix.lower() for prefix in exclude_prefixes]))

# 应用过滤条件
adata_orig = adata_orig[:, exclude_condition]
adata_orig
[805]:
View of AnnData object with n_obs × n_vars = 10802 × 21114
    obs: 'library', 'batch', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'predicted_doublet', 'cell_type', 'state_info', 'time_info', 'tissue', 'state_info_old'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable'
    uns: 'cell_type_colors', 'clonal_time_points', 'clone_id', 'data_des', 'library_colors', 'neighbors', 'scrublet', 'time_ordering', 'tissue_colors', 'umap'
    obsm: 'X_clone', 'X_emb', 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
[806]:
adata_orig=adata_orig[adata_orig.obs['tissue'].isin(['Skull'])]
adata_orig=adata_orig[adata_orig.obs['state_info']!='Dc']
adata_orig.uns['data_des']=['Skull']

adata_orig.obs['cell_type']=adata_orig.obs['cell_type'].astype(str)
adata_orig.obs['cell_type']=pd.Categorical(adata_orig.obs['cell_type']).set_categories(['HSC', 'MkP','Ery', 'Baso', 'Neu', 'Mon', 'LMPP'], ordered=True)
adata_orig.uns['cell_type_colors']=['#d62728','#ff7f0e', '#1f77b4', '#279e68',  '#aa40fc', '#8c564b', '#e377c2'] #,'#d9d9d9']
adata_orig.obs['state_info']=adata_orig.obs['cell_type'].astype(str)
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
[807]:
time_map={'LMPP':'t0', 'HSC':'t0', 'NA':'t1','Baso':'t1', 'MkP':'t1', 'Mon':'t1',  'Ery':'t1','Neu':'t1', 'Dc':'t1'}
adata_orig.obs['time_info']=adata_orig.obs['cell_type'].map(time_map)
cs.pp.initialize_adata_object(adata_orig)
Clones without any cells are removed.
Time points with clonal info: ['t0' 't1']
WARNING: Default ascending order of time points are: ['nan' 't0' 't1']. If not correct, run cs.hf.update_time_ordering for correction.
WARNING: Please make sure that the count matrix adata.X is NOT log-transformed.
[807]:
AnnData object with n_obs × n_vars = 6094 × 21114
    obs: 'library', 'batch', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'predicted_doublet', 'cell_type', 'state_info', 'time_info', 'tissue', 'state_info_old'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable'
    uns: 'cell_type_colors', 'clonal_time_points', 'clone_id', 'data_des', 'library_colors', 'neighbors', 'scrublet', 'time_ordering', 'tissue_colors', 'umap'
    obsm: 'X_clone', 'X_emb', 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
[808]:
cs.pp.refine_state_info_by_leiden_clustering(adata_orig,selected_key='state_info',selected_values='HSC')

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_15_1.png
[809]:
df_clone=cs.tl.clone_statistics(adata_orig,joint_variable='state_info');

                     clone_N  clone_fraction
state_info
Baso                      18        0.017408
Baso,Ery                   4        0.003868
Baso,Ery,Neu               2        0.001934
Baso,HSC                   1        0.000967
Baso,HSC,Neu               1        0.000967
...                      ...             ...
nan,MkP,Ery,Neu,Mon        3        0.002901
nan,Mon                    6        0.005803
nan,Mon,Neu                3        0.002901
nan,Neu                    5        0.004836
nan,Neu,Mon,HSC            1        0.000967

[136 rows x 2 columns]
-----------
            cell_N  clonal_cell_fraction
state_info
Baso            97              0.030667
Ery            459              0.145115
HSC            552              0.174518
LMPP           197              0.062283
MkP            215              0.067973
Mon            573              0.181157
Neu            732              0.231426
nan            338              0.106861
[810]:
df_clone
[810]:
clone_id clone_size state_info state_info_N
0 0 2 Ery,Neu 2
1 1 2 nan,HSC 2
2 2 2 MkP,nan 2
3 3 5 Ery,Mon,Neu 3
4 4 1 nan 1
... ... ... ... ...
1029 1029 1 MkP 1
1030 1030 2 Ery,Neu 2
1031 1031 2 Ery 1
1032 1032 1 LMPP 1
1033 1033 1 nan 1

1034 rows × 4 columns

[811]:
sc.pl.embedding(adata_orig,color='time_info',basis='X_umap',frameon=False,title='',
               save='_time_info')
WARNING: saving figure to file figures/X_umap_time_info.pdf
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_18_2.png
[812]:
sc.pl.embedding(adata_orig,color='cell_type',basis='X_umap',frameon=False,title='',legend_loc='on data',
           legend_fontsize=12, legend_fontoutline=1.5,s=40,
               save='_skull_celltypes')
WARNING: saving figure to file figures/X_umap_skull_celltypes.pdf
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_19_2.png
[813]:
import numpy as np
import scipy.sparse as ssp

# 假设 df_clone 和 adata_orig 已经定义好了
ref_fates = ['MkP', 'Mon', 'Neu', 'Ery', 'Baso']
celltype_colors = dict(zip(adata_orig.obs['cell_type'].cat.categories, adata_orig.uns['cell_type_colors']))

# 设置figure参数
cs.settings.set_figure_params(pointsize=20)

# 处理df_clone的state_info
df_clone['state_info'] = df_clone['state_info'].str.split(',').apply(lambda x: sorted(x)).apply(lambda x: ','.join(x))

# 遍历每个fate
for fate_tmp in ref_fates:
    all_fates_tmp = ref_fates.copy()
    all_fates_tmp.remove(fate_tmp)

    # 筛选符合条件的df_clone子集
    df_clone_sub = df_clone[df_clone['state_info'].apply(lambda x: (fate_tmp in x) & (('HSC' in x)  or ('LMPP' in x)) & (np.sum([__ in x for __ in all_fates_tmp]) == 0))]

    # 计算MkP_bias
    MkP_bias = (adata_orig.obsm['X_clone'][:, df_clone_sub['clone_id'].to_numpy()].sum(1).A.flatten() > 0).astype(int)
    if(fate_tmp=='MkP'):
        print(MkP_bias)
    # 创建一个新的列在obs中以记录MkP_bias
    adata_orig.obs[f'{fate_tmp}_biased'] = MkP_bias

    # 设置cell_id_t1和sp_idx
    cell_id_t1 = np.ones(len(MkP_bias)).astype(bool)
    sp_idx = np.arange(np.sum(cell_id_t1))

    # 生成fate map embedding图
    cs.pl.fate_map_embedding(adata_orig, MkP_bias, cell_id_t1=cell_id_t1, sp_idx=sp_idx, figure_title=f'{fate_tmp} biased cells')

    # 设置颜色和临时adata
    color_tmp = celltype_colors[fate_tmp]
    adata_tmp = adata_orig.copy()
    adata_tmp.uns['data_des'] = ['all_bias_clones']
    adata_tmp.obsm['X_clone'] = ssp.csr_matrix(np.array([(MkP_bias > 0).astype(int)])).T

    # 绘制克隆在流形上的图
    ax = cs.pl.clones_on_manifold(adata_tmp, selected_clone_list=[0], color_list=[color_tmp, color_tmp, color_tmp], clone_markersize=20, selected_times='t0')
    ax.set_title(f'{fate_tmp} biased HSPCs')

# 最终结果中可以通过 adata_orig.obs 查看每个fate的biased cells信息

[0 0 0 ... 0 0 0]
../_images/notebooks_Darlin_20_1.png
../_images/notebooks_Darlin_20_2.png
../_images/notebooks_Darlin_20_3.png
../_images/notebooks_Darlin_20_4.png
../_images/notebooks_Darlin_20_5.png
../_images/notebooks_Darlin_20_6.png
../_images/notebooks_Darlin_20_7.png
../_images/notebooks_Darlin_20_8.png
../_images/notebooks_Darlin_20_9.png
../_images/notebooks_Darlin_20_10.png
[814]:
# 假设 ref_fates 和 adata_orig 已经定义好
ref_fates = ['MkP', 'Mon', 'Neu', 'Ery', 'Baso']

# 创建一个新的列来存储fate信息
adata_orig.obs['fate'] = 2  # 默认值为2

# 设置每个fate的biased信息
for fate_tmp in ref_fates:
    all_fates_tmp = ref_fates.copy()
    all_fates_tmp.remove(fate_tmp)

    df_clone_sub = df_clone[df_clone['state_info'].apply(lambda x: (fate_tmp in x) & (('HSC' in x)  or ('LMPP' in x)) & (np.sum([__ in x for __ in all_fates_tmp]) == 0))]

    MkP_bias = (adata_orig.obsm['X_clone'][:, df_clone_sub['clone_id'].to_numpy()].sum(1).A.flatten() > 0).astype(int)

    adata_orig.obs[f'{fate_tmp}_biased'] = MkP_bias

# 更新fate列的值
adata_orig.obs.loc[adata_orig.obs['MkP_biased'] == 1, 'fate'] = 1

# 如果有一个为1且不是MkP
adata_orig.obs.loc[(adata_orig.obs[['Mon_biased', 'Neu_biased', 'Ery_biased']].sum(axis=1) > 0) & (adata_orig.obs['MkP_biased'] != 1), 'fate'] = 0

# 其余的保持为2
adata_orig.obs.loc[(adata_orig.obs[['MkP_biased', 'Mon_biased', 'Neu_biased', 'Ery_biased']].sum(axis=1) == 0), 'fate'] = 2

# 删除临时的biased列
for fate_tmp in ref_fates:
    del adata_orig.obs[f'{fate_tmp}_biased']

[815]:
adata_orig.obs['fate']=adata_orig.obs['fate'].astype(str)
[816]:
adata_orig.uns['cell_type_colors']
[816]:
['#d62728', '#ff7f0e', '#1f77b4', '#279e68', '#aa40fc', '#8c564b', '#e377c2']
[817]:
import matplotlib.pyplot as plt
import seaborn as sns

# 绘制整个adata_orig的UMAP背景
sc.pl.umap(adata_orig, s=20, show=False)
plt.gca().set_title('UMAP of all cells')

# 筛选出fate为0或1的细胞
adata_fate_0_or_1 = adata_orig[adata_orig.obs['fate'].isin(['0','1'])]

# 绘制fate为0或1的细胞,叠加在整体UMAP上
sc.pl.umap(adata_fate_0_or_1, color='fate', s=50, ax=plt.gca(), show=False)

# 显示图形
plt.show()

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  cax = scatter(
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_24_1.png
[818]:
adata = cs.tmap.infer_Tmap_from_multitime_clones(
    adata_orig,
    clonal_time_points=["t0"],
    later_time_point="t1",
    smooth_array=[10,10,5],
    sparsity_threshold=0.1,
    intraclone_threshold=0.2,
    max_iter_N=10,
    epsilon_converge=0.01,
)
------Compute the full Similarity matrix if necessary------
------Infer transition map between initial time points and the later time one------
--------Current initial time point: t0--------
Step 1: Select time points
--> Clonal cell fraction (day t0-t1): 0.32155731795241527
--> Clonal cell fraction (day t1-t0): 0.25951219512195123
--> Numer of cells that are clonally related -- day t0: 446  and day t1: 1064
Number of multi-time clones post selection: 143
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
Cell number=5487, Clone number=143
--> clonal_cell_id_t1: 446
--> Tmap_cell_id_t1: 1387
Step 2: Optimize the transition map recursively
Load pre-computed similarity matrix
--> Load from hard disk--------
--> Compute similarity matrix: load existing data
--> Time elapsed:  0.00816798210144043
--> Time elapsed:  0.04114341735839844
--> Time elapsed:  0.019692659378051758
--> Time elapsed:  0.06539463996887207
--> Compute similarity matrix: load existing data
--> Time elapsed:  0.00664210319519043
--> Time elapsed:  0.041623592376708984
--> Time elapsed:  0.019869565963745117
--> Time elapsed:  0.06673765182495117
--> Compute similarity matrix: load existing data
--> Time elapsed:  0.004523754119873047
--> Time elapsed:  0.02536463737487793
--> Time elapsed:  0.012595415115356445
--> Time elapsed:  0.04546666145324707
Iteration 1, Use smooth_round=10
--> Clone normalization
--> Relative time point pair index: 0
--> Clone id: 0
--> Start to smooth the refined clonal map
--> Phase I: time elapsed --  0.015916109085083008
--> Phase II: time elapsed --  0.02384638786315918
Iteration 2, Use smooth_round=10
--> Clone normalization
--> Relative time point pair index: 0
--> Clone id: 0
--> Start to smooth the refined clonal map
--> Phase I: time elapsed --  0.012611865997314453
--> Phase II: time elapsed --  0.02019977569580078
Iteration 3, Use smooth_round=5
--> Clone normalization
--> Relative time point pair index: 0
--> Clone id: 0
--> Start to smooth the refined clonal map
--> Phase I: time elapsed --  0.010289430618286133
--> Phase II: time elapsed --  0.01741957664489746
../_images/notebooks_Darlin_25_3.png
../_images/notebooks_Darlin_25_4.png
Iteration 4, Use smooth_round=5
--> Clone normalization
--> Relative time point pair index: 0
--> Clone id: 0
--> Start to smooth the refined clonal map
--> Phase I: time elapsed --  0.0070858001708984375
--> Phase II: time elapsed --  0.014077425003051758
../_images/notebooks_Darlin_25_6.png
--> Final round of Smooth (to expand the state space of Tmap to include non-clonal states)
--> Phase I: time elapsed --  0.05958151817321777
--> Phase II: time elapsed --  0.1544644832611084
--> ----Intraclone transition map----
--> Clone normalization
--> Relative time point pair index: 0
--> Clone id: 0
-----------Total used time: 5.106891393661499 s ------------
[820]:
adata.X[0].toarray()
[820]:
array([[0.       , 0.       , 0.       , ..., 0.       , 0.       ,
        4.8019204]], dtype=float32)
[36]:
cs.settings.set_figure_params(pointsize=20,fontsize=18)
for selected_fates in [['MkP'],['Ery'],['Neu'],['Mon']]:
    cs.tl.fate_map(
        adata,
        selected_fates=selected_fates,
        source="transition_map",
        map_backward=True,
    )
    cs.pl.fate_map(
        adata,
        selected_fates=selected_fates,
        selected_times='t0',
        source="transition_map",
        plot_target_state=False,
        background=False,
        show_histogram=False,
        auto_color_scale=False,
        vmax=0.6,

    )
Results saved at adata.obs['fate_map_transition_map_MkP']
Results saved at adata.obs['fate_map_transition_map_Ery']
Results saved at adata.obs['fate_map_transition_map_Neu']
Results saved at adata.obs['fate_map_transition_map_Mon']
../_images/notebooks_Darlin_27_1.png
../_images/notebooks_Darlin_27_2.png
../_images/notebooks_Darlin_27_3.png
../_images/notebooks_Darlin_27_4.png
[643]:
adata.obs['Fate_bias']=np.nan
# sel_idx_1=((adata.obs['fate_map_transition_map_MkP']>0.25) & (adata.obsm['X_emb'][:,0]<4))
# adata.obs.loc[sel_idx_1,'Fate_bias']='Early MkP bias'
# sel_idx_2=((adata.obs['fate_map_transition_map_MkP']>0.25) & (adata.obsm['X_emb'][:,0]>=4))
# adata.obs.loc[sel_idx_2,'Fate_bias']='Late MkP bias'
sel_idx_1=((adata.obs['fate_map_transition_map_MkP']>0.25))
adata.obs.loc[sel_idx_1,'Fate_bias']='MkP bias'
sel_idx_3=((adata.obs['fate_map_transition_map_MkP']<=0.25)) & (adata.obs['state_info']=='HSC')
adata.obs.loc[sel_idx_3,'Fate_bias']='No MkP bias'
# sel_idx_4=(adata.obs['state_info']=='LMPP')
# adata.obs.loc[sel_idx_4,'Fate_bias']='LMPP'
adata_sub=adata[adata.obs['time_info']=='t0']
[644]:
cs.pl.plot_adata_with_prefered_order(adata,'Fate_bias',plot_order=['LMPP','No MkP bias','MkP bias'],
                              palette={'LMPP':'#d9d9d9','No MkP bias':'#ff7f0e','MkP bias':'#1f77b4','Late MkP bias':'grey'},
                              linewidth = 0,s=30)
[644]:
<AxesSubplot: xlabel='x', ylabel='y'>
../_images/notebooks_Darlin_29_1.png
[ ]:

[780]:
import pandas as pd
import numpy as np

# 假设 ref_fates 和 adata_orig 已经定义好
adata_orig = adata.copy()
ref_fates = ['MkP', 'Mon', 'Neu', 'Ery', 'Baso']

# 创建一个新的列来存储fate信息
adata_orig.obs['GT'] = np.nan  # 默认值为 NaN

# 设置每个fate的biased信息
for fate_tmp in ref_fates:
    all_fates_tmp = ref_fates.copy()
    all_fates_tmp.remove(fate_tmp)

    df_clone_sub = df_clone[df_clone['state_info'].apply(lambda x: (fate_tmp in x) & (('HSC' in x)  or ('LMPP' in x)) & (np.sum([__ in x for __ in all_fates_tmp]) == 0))]

    MkP_bias = (adata_orig.obsm['X_clone'][:, df_clone_sub['clone_id'].to_numpy()].sum(1).A.flatten() > 0).astype(int)

    adata_orig.obs[f'{fate_tmp}_biased'] = MkP_bias

# 更新fate列的值
adata_orig.obs.loc[adata_orig.obs['MkP_biased'] == 1, 'GT'] = '1'

# 如果有一个为1且不是MkP
adata_orig.obs.loc[(adata_orig.obs[['Mon_biased', 'Neu_biased', 'Ery_biased']].sum(axis=1) > 0) & (adata_orig.obs['MkP_biased'] != 1), 'GT'] = '0'

# 其余的保持为 NaN
adata_orig.obs.loc[(adata_orig.obs[['MkP_biased', 'Mon_biased', 'Neu_biased', 'Ery_biased']].sum(axis=1) == 0), 'GT'] = np.nan

# 删除临时的 biased 列
for fate_tmp in ref_fates:
    del adata_orig.obs[f'{fate_tmp}_biased']

# 检查唯一值
print(adata_orig.obs['GT'].unique())
[nan '1' '0']
[180]:
adata_orig.obs['GT'].unique()
[180]:
array([nan, '1', '0'], dtype=object)
[158]:
#sc.pl.umap(adata_orig, color='GT', s=50, ax=plt.gca(), show=False)
adata_orig
[158]:
AnnData object with n_obs × n_vars = 5487 × 21114
    obs: 'library', 'batch', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'predicted_doublet', 'cell_type', 'state_info', 'time_info', 'tissue', 'state_info_old', 'fate', 'fate_map_transition_map_MkP', 'fate_map_transition_map_Ery', 'fate_map_transition_map_Neu', 'fate_map_transition_map_Mon', 'Fate_bias', 'GT'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable'
    uns: 'cell_type_colors', 'clonal_time_points', 'clone_id', 'data_des', 'library_colors', 'neighbors', 'scrublet', 'time_ordering', 'tissue_colors', 'umap', 'time_info_colors', 'Tmap_cell_id_t1', 'Tmap_cell_id_t2', 'clonal_cell_id_t1', 'clonal_cell_id_t2', 'sp_idx', 'transition_map', 'intraclone_transition_map', 'available_map', 'fate_map_params', 'fate_potency_tmp'
    obsm: 'X_clone', 'X_emb', 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
[172]:
adata_orig.obs['index']=range(len(adata_orig))

[182]:
# adata_fate_0_or_1.obs['GT']
GT_colors= {
    '1': '#1f77b4',   #
    '0': '#2ca02c'  # 绿色橙色
}
adata_orig.obs['GT'].unique()
sc.pl.umap(adata_orig, color='GT', s=50, ax=plt.gca(), show=False)

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
[182]:
<AxesSubplot: title={'center': 'GT'}, xlabel='UMAP1', ylabel='UMAP2'>
../_images/notebooks_Darlin_35_2.png
[183]:
# import matplotlib.pyplot as plt
# import seaborn as sns
# GT_colors= {
#     'HSC': '#ff7f0e',   # 蓝色
#     'mkp_bias': '#1f77b4',   #
#     'no mkp_bias': '#2ca02c'  # 绿色橙色
# }
# # 绘制整个adata_orig的UMAP背景
# sc.pl.umap(adata_orig, s=20, show=False)
# plt.gca().set_title('UMAP of all cells')

# # 筛选出fate为0或1的细胞
# adata_fate_0_or_1 = adata_orig[adata_orig.obs['GT'].isin(['0','1'])]
# adata_fate_0_or_1=adata_fate_0_or_1[adata_fate_0_or_1.obs['state_info']=='HSC']
# # 绘制fate为0或1的细胞,叠加在整体UMAP上

# dic1={'0':'no mkp_bias','1':'mkp_bias'}
# #dic1={0:'multilineage',1:'myelo-erythroid restricted'}

# #adata_fate_0_or_1.obs['Fate_bias']=None
# #adata_fate_0_or_1.obs['Fate_bias']=adata_fate_0_or_1.obs['GT'].map(dic1)
# #adata_fate_0_or_1.obs['GT']=adata_fate_0_or_1.obs['Fate_bias']
# sc.pl.umap(adata_fate_0_or_1, color='GT', s=50, ax=plt.gca(), show=False,  palette=GT_colors)
# ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_GT.pdf', bbox_inches='tight')
# # 显示图形
# plt.show()

[174]:
# import matplotlib.pyplot as plt
# import seaborn as sns

# # 绘制整个adata_orig的UMAP背景
# sc.pl.umap(adata_orig, s=20, color="GT")
# plt.gca().set_title('UMAP of all cells')

# # 筛选出fate为0或1的细胞
# adata_fate_0_or_1 = adata_orig[adata_orig.obs['GT'].isin(['0','1'])]
# adata_fate_0_or_1=adata_fate_0_or_1[adata_fate_0_or_1.obs['state_info']=='HSC']
# # 绘制fate为0或1的细胞,叠加在整体UMAP上

# dic1={'0':'no mkp_bias','1':'mkp_bias',2:None}
# #dic1={0:'multilineage',1:'myelo-erythroid restricted'}

# adata_fate_0_or_1.obs['Fate_bias']=None
# adata_fate_0_or_1.obs['Fate_bias']=adata_fate_0_or_1.obs['GT'].map(dic1)
# adata_fate_0_or_1.obs['GT']=adata_fate_0_or_1.obs['Fate_bias']
# sc.pl.umap(adata_fate_0_or_1, color='GT', s=50, ax=plt.gca(), show=False,  palette=GT_colors)
# ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_GT.pdf', bbox_inches='tight')
# # 显示图形
# plt.show()

[219]:
adata_orig.obs
[219]:
library batch n_genes n_counts n_genes_by_counts total_counts total_counts_mt pct_counts_mt doublet_score predicted_doublet ... tissue state_info_old fate fate_map_transition_map_MkP fate_map_transition_map_Ery fate_map_transition_map_Neu fate_map_transition_map_Mon Fate_bias GT index
LL837_skull_AAACCCAAGAGTCACG LL837_skull 1 2182 10000.000000 2182 8330.0 566.0 6.794718 0.043403 0 ... Skull Mon 2 NaN NaN NaN NaN NaN NaN 0
LL837_skull_AAACCCAAGTGAACAT LL837_skull 1 2247 10000.000000 2247 10328.0 128.0 1.239349 0.048237 0 ... Skull Ery 2 NaN NaN NaN NaN NaN NaN 1
LL837_skull_AAACCCACATCCGTTC LL837_skull 1 4151 9999.999023 4150 29156.0 420.0 1.440527 0.012388 0 ... Skull Mon 2 NaN NaN NaN NaN NaN NaN 2
LL837_skull_AAACCCAGTCCACATA LL837_skull 1 1515 10000.000000 1515 6034.0 146.0 2.419622 0.146524 0 ... Skull Meg 2 NaN NaN NaN NaN NaN NaN 3
LL837_skull_AAACCCAGTGAACGGT LL837_skull 1 1782 10000.000000 1782 6942.0 108.0 1.555748 0.022658 0 ... Skull Meg 2 NaN NaN NaN NaN NaN NaN 4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
LL837_skull_TTTGGTTTCATGCCCT LL837_skull 1 4073 9999.999023 4073 29120.0 474.0 1.627747 0.019994 0 ... Skull Ery 2 NaN NaN NaN NaN NaN NaN 5482
LL837_skull_TTTGGTTTCGACGTCG LL837_skull 1 3697 10000.000977 3697 26760.0 632.0 2.361734 0.016280 0 ... Skull Neu 2 NaN NaN NaN NaN NaN NaN 5483
LL837_skull_TTTGTTGAGCGTTACT LL837_skull 1 1476 10000.000000 1476 6594.0 286.0 4.337276 0.079240 0 ... Skull LMPP 2 0.074549 0.185234 0.142106 0.400063 NaN NaN 5484
LL837_skull_TTTGTTGAGTGATCGG LL837_skull 1 3283 9999.999023 3283 21940.0 274.0 1.248861 0.023810 0 ... Skull Neu 2 NaN NaN NaN NaN NaN NaN 5485
LL837_skull_TTTGTTGGTTACAGCT LL837_skull 1 817 10000.000977 817 2316.0 192.0 8.290155 0.031239 0 ... Skull LMPP 2 0.163187 0.226108 0.252171 0.273444 NaN NaN 5486

5487 rows × 23 columns

[175]:
sc.pl.umap(adata_orig, color='cell_type', s=10)

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_39_1.png
[1100]:
adata_orig.obs['time_info']
[1100]:
LL837_skull_AAACCCAAGAGTCACG    t1
LL837_skull_AAACCCAAGTGAACAT    t1
LL837_skull_AAACCCACATCCGTTC    t1
LL837_skull_AAACCCAGTCCACATA    t1
LL837_skull_AAACCCAGTGAACGGT    t1
                                ..
LL837_skull_TTTGGTTTCATGCCCT    t1
LL837_skull_TTTGGTTTCGACGTCG    t1
LL837_skull_TTTGTTGAGCGTTACT    t0
LL837_skull_TTTGTTGAGTGATCGG    t1
LL837_skull_TTTGTTGGTTACAGCT    t0
Name: time_info, Length: 5487, dtype: category
Categories (2, object): ['t0', 't1']
[220]:
# 映射 'GT' 列
#adata_orig.obs['GT'] = adata_orig.obs['GT'].map(dic1)

# 将不等于 'HSC' 的 'cell_type' 对应的 'GT' 列设为 None
adata_orig.obs.loc[adata_orig.obs['cell_type'] != 'HSC', 'GT'] = None

# 将不等于 't0' 的 'time_info' 对应的 'GT' 列设为 None
adata_orig.obs.loc[adata_orig.obs['time_info'] != 't0', 'GT'] = None

# 绘制 UMAP
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)
sc.pl.umap(adata_orig, color='GT', s=100, ax=ax,palette=GT_colors)

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In [220], line 13
     11 fig, ax = plt.subplots()
     12 fig.set_size_inches(5, 5)
---> 13 sc.pl.umap(adata_orig, color='GT', s=100, ax=ax,palette=GT_colors)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:669, in umap(adata, **kwargs)
    610 @_wraps_plot_scatter
    611 @_doc_params(
    612     adata_color_etc=doc_adata_color_etc,
   (...)
    616 )
    617 def umap(adata, **kwargs) -> Union[Axes, List[Axes], None]:
    618     """\
    619     Scatter plot in UMAP basis.
    620
   (...)
    667     tl.umap
    668     """
--> 669     return embedding(adata, 'umap', **kwargs)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:265, in embedding(adata, basis, color, gene_symbols, use_raw, sort_order, edges, edges_width, edges_color, neighbors_key, arrows, arrows_kwds, groups, components, dimensions, layer, projection, scale_factor, color_map, cmap, palette, na_color, na_in_legend, size, frameon, legend_fontsize, legend_fontweight, legend_loc, legend_fontoutline, colorbar_loc, vmax, vmin, vcenter, norm, add_outline, outline_width, outline_color, ncols, hspace, wspace, title, show, save, ax, return_fig, **kwargs)
    256 for count, (value_to_plot, dims) in enumerate(zip(color, dimensions)):
    257     color_source_vector = _get_color_source_vector(
    258         adata,
    259         value_to_plot,
   (...)
    263         groups=groups,
    264     )
--> 265     color_vector, categorical = _color_vector(
    266         adata,
    267         value_to_plot,
    268         color_source_vector,
    269         palette=palette,
    270         na_color=na_color,
    271     )
    273     # Order points
    274     order = slice(None)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1213, in _color_vector(adata, values_key, values, palette, na_color)
   1209     return values, False
   1210 else:  # is_categorical_dtype(values)
   1211     color_map = {
   1212         k: to_hex(v)
-> 1213         for k, v in _get_palette(adata, values_key, palette=palette).items()
   1214     }
   1215     # If color_map does not have unique values, this can be slow as the
   1216     # result is not categorical
   1217     color_vector = pd.Categorical(values.map(color_map))

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1179, in _get_palette(adata, values_key, palette)
   1177 values = pd.Categorical(adata.obs[values_key])
   1178 if palette:
-> 1179     _utils._set_colors_for_categorical_obs(adata, values_key, palette)
   1180 elif color_key not in adata.uns or len(adata.uns[color_key]) < len(
   1181     values.categories
   1182 ):
   1183     #  set a default palette in case that no colors or few colors are found
   1184     _utils._set_default_colors_for_categorical_obs(adata, values_key)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_utils.py:390, in _set_colors_for_categorical_obs(adata, value_to_plot, palette)
    388     colors_list = [to_hex(x) for x in cmap(np.linspace(0, 1, len(categories)))]
    389 elif isinstance(palette, cabc.Mapping):
--> 390     colors_list = [to_hex(palette[k], keep_alpha=True) for k in categories]
    391 else:
    392     # check if palette is a list and convert it to a cycler, thus
    393     # it doesnt matter if the list is shorter than the categories length:
    394     if isinstance(palette, cabc.Sequence):

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_utils.py:390, in <listcomp>(.0)
    388     colors_list = [to_hex(x) for x in cmap(np.linspace(0, 1, len(categories)))]
    389 elif isinstance(palette, cabc.Mapping):
--> 390     colors_list = [to_hex(palette[k], keep_alpha=True) for k in categories]
    391 else:
    392     # check if palette is a list and convert it to a cycler, thus
    393     # it doesnt matter if the list is shorter than the categories length:
    394     if isinstance(palette, cabc.Sequence):

KeyError: 0
../_images/notebooks_Darlin_41_1.png
[1124]:
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_GT.pdf', bbox_inches='tight')

[186]:
import sklearn
sklearn.metrics.silhouette_score(
    adata_orig[adata_orig.obs['GT'].isin(['no mkp_bias', 'mkp_bias'])].obsm["X_umap"],
    adata_orig[adata_orig.obs['GT'].isin(['no mkp_bias', 'mkp_bias'])].obs['GT']
)
[1073]:
adata_orig
[1073]:
AnnData object with n_obs × n_vars = 5487 × 21114
    obs: 'library', 'batch', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'predicted_doublet', 'cell_type', 'state_info', 'time_info', 'tissue', 'state_info_old', 'fate', 'fate_map_transition_map_MkP', 'fate_map_transition_map_Ery', 'fate_map_transition_map_Neu', 'fate_map_transition_map_Mon', 'Fate_bias', 'GT', 'index'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable'
    uns: 'cell_type_colors', 'clonal_time_points', 'clone_id', 'data_des', 'library_colors', 'neighbors', 'scrublet', 'time_ordering', 'tissue_colors', 'umap', 'Tmap_cell_id_t1', 'Tmap_cell_id_t2', 'clonal_cell_id_t1', 'clonal_cell_id_t2', 'sp_idx', 'transition_map', 'intraclone_transition_map', 'available_map', 'fate_map_params', 'fate_potency_tmp'
    obsm: 'X_clone', 'X_emb', 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
[221]:
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)

# 绘制UMAP图
sc.pl.umap(adata_orig, color='cell_type', s=100, show=False, ax=ax)
#ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_prd.pdf', bbox_inches='tight')


/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
[221]:
<AxesSubplot: title={'center': 'cell_type'}, xlabel='UMAP1', ylabel='UMAP2'>
../_images/notebooks_Darlin_45_2.png
[1128]:
adata_orig.obs['cell_type']
[1128]:
LL837_skull_AAACCCAAGAGTCACG     Mon
LL837_skull_AAACCCAAGTGAACAT     Ery
LL837_skull_AAACCCACATCCGTTC     Mon
LL837_skull_AAACCCAGTCCACATA     MkP
LL837_skull_AAACCCAGTGAACGGT     MkP
                                ...
LL837_skull_TTTGGTTTCATGCCCT     Ery
LL837_skull_TTTGGTTTCGACGTCG     Neu
LL837_skull_TTTGTTGAGCGTTACT    LMPP
LL837_skull_TTTGTTGAGTGATCGG     Neu
LL837_skull_TTTGTTGGTTACAGCT    LMPP
Name: cell_type, Length: 5487, dtype: category
Categories (7, object): ['HSC' < 'MkP' < 'Ery' < 'Baso' < 'Neu' < 'Mon' < 'LMPP']
[222]:
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)

# 绘制UMAP图
sc.pl.umap(adata3, color='GT', s=100, show=False, ax=ax, palette=GT_colors)
#ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_prd.pdf', bbox_inches='tight')


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In [222], line 5
      2 fig.set_size_inches(5, 5)
      4 # 绘制UMAP图
----> 5 sc.pl.umap(adata3, color='GT', s=100, show=False, ax=ax, palette=GT_colors)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:669, in umap(adata, **kwargs)
    610 @_wraps_plot_scatter
    611 @_doc_params(
    612     adata_color_etc=doc_adata_color_etc,
   (...)
    616 )
    617 def umap(adata, **kwargs) -> Union[Axes, List[Axes], None]:
    618     """\
    619     Scatter plot in UMAP basis.
    620
   (...)
    667     tl.umap
    668     """
--> 669     return embedding(adata, 'umap', **kwargs)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:265, in embedding(adata, basis, color, gene_symbols, use_raw, sort_order, edges, edges_width, edges_color, neighbors_key, arrows, arrows_kwds, groups, components, dimensions, layer, projection, scale_factor, color_map, cmap, palette, na_color, na_in_legend, size, frameon, legend_fontsize, legend_fontweight, legend_loc, legend_fontoutline, colorbar_loc, vmax, vmin, vcenter, norm, add_outline, outline_width, outline_color, ncols, hspace, wspace, title, show, save, ax, return_fig, **kwargs)
    256 for count, (value_to_plot, dims) in enumerate(zip(color, dimensions)):
    257     color_source_vector = _get_color_source_vector(
    258         adata,
    259         value_to_plot,
   (...)
    263         groups=groups,
    264     )
--> 265     color_vector, categorical = _color_vector(
    266         adata,
    267         value_to_plot,
    268         color_source_vector,
    269         palette=palette,
    270         na_color=na_color,
    271     )
    273     # Order points
    274     order = slice(None)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1213, in _color_vector(adata, values_key, values, palette, na_color)
   1209     return values, False
   1210 else:  # is_categorical_dtype(values)
   1211     color_map = {
   1212         k: to_hex(v)
-> 1213         for k, v in _get_palette(adata, values_key, palette=palette).items()
   1214     }
   1215     # If color_map does not have unique values, this can be slow as the
   1216     # result is not categorical
   1217     color_vector = pd.Categorical(values.map(color_map))

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1179, in _get_palette(adata, values_key, palette)
   1177 values = pd.Categorical(adata.obs[values_key])
   1178 if palette:
-> 1179     _utils._set_colors_for_categorical_obs(adata, values_key, palette)
   1180 elif color_key not in adata.uns or len(adata.uns[color_key]) < len(
   1181     values.categories
   1182 ):
   1183     #  set a default palette in case that no colors or few colors are found
   1184     _utils._set_default_colors_for_categorical_obs(adata, values_key)

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_utils.py:390, in _set_colors_for_categorical_obs(adata, value_to_plot, palette)
    388     colors_list = [to_hex(x) for x in cmap(np.linspace(0, 1, len(categories)))]
    389 elif isinstance(palette, cabc.Mapping):
--> 390     colors_list = [to_hex(palette[k], keep_alpha=True) for k in categories]
    391 else:
    392     # check if palette is a list and convert it to a cycler, thus
    393     # it doesnt matter if the list is shorter than the categories length:
    394     if isinstance(palette, cabc.Sequence):

File ~/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_utils.py:390, in <listcomp>(.0)
    388     colors_list = [to_hex(x) for x in cmap(np.linspace(0, 1, len(categories)))]
    389 elif isinstance(palette, cabc.Mapping):
--> 390     colors_list = [to_hex(palette[k], keep_alpha=True) for k in categories]
    391 else:
    392     # check if palette is a list and convert it to a cycler, thus
    393     # it doesnt matter if the list is shorter than the categories length:
    394     if isinstance(palette, cabc.Sequence):

KeyError: 'mkp_bias'
../_images/notebooks_Darlin_47_1.png
[187]:
fig,ax=createFig()
fig.set_size_inches(5,5)

sc.pl.umap(adata_fate_0_or_1, color='Fate_bias', s=100,ax=ax,)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_cospar.pdf', bbox_inches='tight')

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_48_1.png
[196]:
adata_orig[(adata_orig.obs['GT'].isin(['0','1']))].obs

[196]:
library batch n_genes n_counts n_genes_by_counts total_counts total_counts_mt pct_counts_mt doublet_score predicted_doublet ... state_info_old fate fate_map_transition_map_MkP fate_map_transition_map_Ery fate_map_transition_map_Neu fate_map_transition_map_Mon Fate_bias GT Fate_binary clone
13 LL837_skull 1 2411 10000.000000 2411 10746.0 218.0 2.028662 0.053785 0 ... HSC 1 0.219269 0.158679 0.367359 0.216678 No MkP bias 1 0 988
90 LL837_skull 1 2774 10000.000000 2774 12630.0 222.0 1.757720 0.063830 0 ... HSC 1 0.488377 0.248923 0.097766 0.118884 MkP bias 1 1 805
112 LL837_skull 1 2341 10000.000000 2341 9946.0 192.0 1.930424 0.046968 0 ... HSC 0 0.128018 0.192091 0.386379 0.256319 No MkP bias 0 0 635
138 LL837_skull 1 2176 9999.999023 2176 9826.0 188.0 1.913291 0.052323 0 ... HSC 0 0.539199 0.286691 0.107701 0.036490 MkP bias 0 1 8
264 LL837_skull 1 2155 10000.000000 2155 8708.0 198.0 2.273771 0.038168 0 ... HSC 1 0.360206 0.175236 0.265452 0.174175 MkP bias 1 1 444
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5256 LL837_skull 1 1697 10000.000000 1697 7322.0 124.0 1.693526 0.081879 0 ... HSC 0 0.348092 0.132318 0.414731 0.042925 MkP bias 0 1 251
5279 LL837_skull 1 1293 10000.000977 1293 4532.0 130.0 2.868491 0.090645 0 ... HSC 1 0.395193 0.151719 0.306524 0.107999 MkP bias 1 1 777
5357 LL837_skull 1 1799 10000.000000 1799 6732.0 130.0 1.931075 0.028299 0 ... HSC 0 0.426256 0.188785 0.268747 0.079448 MkP bias 0 1 170
5398 LL837_skull 1 1944 10000.000000 1944 7712.0 140.0 1.815353 0.053785 0 ... HSC 0 0.159331 0.248843 0.387131 0.148377 No MkP bias 0 0 272
5418 LL837_skull 1 2737 10000.000000 2737 11318.0 218.0 1.926135 0.039152 0 ... HSC 0 0.326030 0.129297 0.297700 0.219458 MkP bias 0 1 8

68 rows × 24 columns

[723]:
adata_orig[(adata_orig.obs['GT'] != '2') & (~adata_orig.obs['Fate_bias'].isin(['MkP bias', 'No MkP bias']))].obs

[723]:
library batch n_genes n_counts n_genes_by_counts total_counts total_counts_mt pct_counts_mt doublet_score predicted_doublet ... time_info tissue state_info_old fate fate_map_transition_map_MkP fate_map_transition_map_Ery fate_map_transition_map_Neu fate_map_transition_map_Mon Fate_bias GT
LL837_skull_AAAGGATCACGTTGGC LL837_skull 1 4425 10000.000000 4425 42134.0 766.0 1.818009 0.011020 0 ... t1 Skull Ery 0 NaN NaN NaN NaN NaN 0
LL837_skull_AACCTGACATGGATCT LL837_skull 1 3366 10000.000000 3366 18584.0 396.0 2.130865 0.060215 0 ... t1 Skull Mon 0 NaN NaN NaN NaN NaN 0
LL837_skull_AAGTGAATCAGTGTCA LL837_skull 1 3013 10000.000977 3013 16668.0 542.0 3.251740 0.061986 0 ... t1 Skull Meg 1 NaN NaN NaN NaN NaN 1
LL837_skull_ACAAAGAAGAGGCGTT LL837_skull 1 3105 9999.999023 3105 16682.0 290.0 1.738401 0.035385 0 ... t1 Skull Meg 1 NaN NaN NaN NaN NaN 1
LL837_skull_ACACGCGTCAACCTTT LL837_skull 1 3517 9999.999023 3517 21620.0 308.0 1.424607 0.040166 0 ... t0 Skull LMPP 0 0.035840 0.210532 0.120582 0.455573 NaN 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
LL837_skull_TTCTTCCTCATTTCCA LL837_skull 1 4472 9999.999023 4471 26352.0 434.0 1.646934 0.018529 0 ... t0 Skull LMPP 1 0.198727 0.231416 0.222007 0.262037 NaN 1
LL837_skull_TTGCTGCAGTGCCAGA LL837_skull 1 4291 9999.999023 4290 23722.0 282.0 1.188770 0.019495 0 ... t1 Skull Meg 1 NaN NaN NaN NaN NaN 1
LL837_skull_TTGGATGAGATGACAT LL837_skull 1 4844 10000.000000 4844 48688.0 816.0 1.675978 0.005157 0 ... t1 Skull Ery 0 NaN NaN NaN NaN NaN 0
LL837_skull_TTTACGTAGAAGGATG LL837_skull 1 5299 10000.000000 5298 38142.0 1538.0 4.032300 0.010055 0 ... t1 Skull Ery 0 NaN NaN NaN NaN NaN 0
LL837_skull_TTTATGCAGCAACTTC LL837_skull 1 2036 9999.998047 2036 8584.0 112.0 1.304753 0.053785 0 ... t1 Skull Ery 0 NaN NaN NaN NaN NaN 0

112 rows × 22 columns

[190]:
import numpy as np
from sklearn.metrics import accuracy_score
from scipy.stats import pearsonr

# 将 Fate_bias 转换为二进制格式
adata_orig.obs['Fate_binary'] = adata_orig.obs['Fate_bias'].map({'MkP bias': 1, 'No MkP bias': 0})

# 删除包含 NaN 值的行,并只保留 GT 不为 2 的行
filtered_data = adata_orig.obs[['Fate_binary', 'GT']].dropna()
filtered_data = filtered_data[filtered_data['GT'] != '2']

# 获取 Fate_binary 和 GT 列
fate_binary = filtered_data['Fate_binary'].astype(int)
gt = filtered_data['GT'].astype(int)

# 计算准确率
accuracy = accuracy_score(gt, fate_binary)
print(f'Accuracy: {accuracy}')

# 计算皮尔森相关系数
pearson_corr, _ = pearsonr(gt, fate_binary)
print(f'Pearson Correlation: {pearson_corr}')

Accuracy: 0.6176470588235294
Pearson Correlation: 0.40683221630902433
[782]:
import numpy as np

# 获取 X_clone 矩阵
X_clone = adata_orig.obsm['X_clone']

# 确保 X_clone 是一个稀疏矩阵
if isinstance(X_clone, np.ndarray):
    X_clone = ssp.csr_matrix(X_clone)

# 为每个细胞分配克隆值
# 假设每个细胞属于一个克隆,这里取 X_clone 的非零列索引作为克隆值
clones = X_clone.argmax(axis=1).A.flatten()

# 将克隆值赋给 obs 中的 clone 属性
adata_orig.obs['clone'] = clones

# 打印前几行查看结果
print(adata_orig.obs[['clone']].head())

   clone
0      0
1      0
2      0
3      0
4      0
[783]:
import pandas as pd
adata_orig.obs['GT']=adata_orig.obs['GT'].astype(str)
# 给 adata_orig 按照数字顺序设置索引
adata_orig.obs = adata_orig.obs.reset_index(drop=True)
adata_orig.obs.index = range(len(adata_orig.obs))


# 筛选 GT 为 '0' 或 '1' 的细胞
selected_cells = adata_orig.obs[adata_orig.obs['GT'].isin(['0', '1'])]

# 初始化结果列表
results = []

# 遍历每个克隆
for clone in selected_cells['clone'].unique():
    clone_cells = adata_orig.obs[adata_orig.obs['clone'] == clone]

    # 找到 state_info 为 HSC 且时间点为 t0 的细胞
    from_cells = clone_cells[(clone_cells['state_info'] == 'HSC') & (clone_cells['time_info'] == 't0')]

    # 找到其他类型细胞且时间点为 t1 的细胞
    to_cells = clone_cells[(clone_cells['time_info'] == 't1') & (clone_cells['state_info'] != 'HSC')]

    # 记录 from_index, to_index, from_cell_type, to_cell_type
    for from_idx in from_cells.index:
        for to_idx in to_cells.index:
            results.append({
                'from_index': from_idx,
                'to_index': to_idx,
                'from_cell_type': adata_orig.obs.at[from_idx, 'state_info'],
                'to_cell_type': adata_orig.obs.at[to_idx, 'state_info'],
                'clone':clone,
            })

# 创建 DataFrame
df = pd.DataFrame(results)

# 显示结果
print(df.head())

   from_index  to_index from_cell_type to_cell_type  clone
0          13      5344            HSC          MkP    988
1        3445        23            HSC          Ery    307
2        3445      1600            HSC          Ery    307
3        3445      3757            HSC          Ery    307
4          90       278            HSC          MkP    805
[785]:
df['clone'].unique().shape
[785]:
(47,)
[418]:

dflist_index = [df2 for _, df2 in df.groupby(['clone'])] np.random.shuffle(dflist_index) len(dflist_index)*0.7 dflist_index1=dflist_index[:33] #36 dflist_index2=dflist_index[33:] dflist_index1=pd.concat(dflist_index1) dflist_index2=pd.concat(dflist_index2) print(len(dflist_index1)) print(len(dflist_index2))
60
24
/tmp/ipykernel_67646/1345040238.py:1: FutureWarning: In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.
  dflist_index = [df2 for _, df2 in df.groupby(['clone'])]
[419]:

dic_clone={} for i in df.values: clone=i[-1] dic_clone[clone]=[] for i in df.values: clone=i[-1] dic_clone[clone].append(i[1])
[372]:
dic={'Neu':0, 'Ery':0, 'MkP':1, 'Mon':0 , 'Baso':0}
traincell1=[]
traincell2=[]
train_rel=[]
testcell1=[]
testcell2=[]
test_rel=[]
for j in range(1):
    cell1=dflist_index1.values.T[0]
    cell2=dflist_index1.values.T[1]
    deep=dflist_index1.values.T[3]
    traincell1+=list(cell1)
    traincell2+=list(cell2)
    for i in deep:
        train_rel.append(dic[i])
    cell1=dflist_index2.values.T[0]
    cell2=dflist_index2.values.T[1]
    deep=dflist_index2.values.T[3]
    testcell1+=list(cell1)
    testcell2+=list(cell2)
    for i in deep:
        test_rel.append(dic[i])
[603]:
# dic={'Neu':0, 'Ery':0, 'MkP':1, 'Mon':0}
# traincell1=[]
# traincell2=[]
# train_rel=[]
# testcell1=[]
# testcell2=[]
# test_rel=[]
# for j in range(100):
#     cell1=dflist_index1.values.T[0]
#     cell2=dflist_index1.values.T[1]
#     deep=dflist_index1.values.T[3]
#     traincell1+=list(cell1)
#     traincell2+=list(cell2)
#     for i in deep:
#         train_rel.append(dic[i])
#     cell1=dflist_index2.values.T[0]
#     cell2=dflist_index2.values.T[1]
#     deep=dflist_index2.values.T[3]
#     testcell1+=list(cell1)
#     testcell2+=list(cell2)
#     for i in deep:
#         test_rel.append(dic[i])

import numpy as np

dic = {'Neu': 0, 'Ery': 0, 'MkP': 1, 'Mon': 0}
traincell1 = []
traincell2 = []
train_rel = []
testcell1 = []
testcell2 = []
test_rel = []

for j in range(100):
    cell1 = dflist_index1.values.T[0]
    cell2 = dflist_index1.values.T[1]
    deep = dflist_index1.values.T[3]
    traincell1 += list(cell1)
    traincell2 += list(cell2)
    for i in deep:
        train_rel.append(dic[i])
    cell1 = dflist_index2.values.T[0]
    cell2 = dflist_index2.values.T[1]
    deep = dflist_index2.values.T[3]
    testcell1 += list(cell1)
    testcell2 += list(cell2)
    for i in deep:
        test_rel.append(dic[i])

# 将训练集的样本转换为 numpy 数组
traincell1 = np.array(traincell1)
traincell2 = np.array(traincell2)
train_rel = np.array(train_rel)

# 找到所有标签为1的样本的索引
indices_of_ones = np.where(train_rel == 1)[0]

# 重采样4倍
resampled_traincell1 = np.concatenate([traincell1] + [traincell1[indices_of_ones]] *1)
resampled_traincell2 = np.concatenate([traincell2] + [traincell2[indices_of_ones]] * 1)
resampled_train_rel = np.concatenate([train_rel] + [train_rel[indices_of_ones]] * 1)

# 转换回列表形式
traincell1 = resampled_traincell1.tolist()
traincell2 = resampled_traincell2.tolist()
train_rel = resampled_train_rel.tolist()


[604]:
##No relation的生成
num=50
from_index=list(set(traincell1))
for i in range(len(set(from_index))):
    print(i)
    a=[]
    a.append(from_index[i])
    cell1=np.random.choice(a,num,replace=True)
    cell1=list(cell1)
    cell1_clone=df[df['from_index']==from_index[i]]['clone'].values[0]
    cell2_choice=set(df['to_index'].unique())-set(dic_clone[cell1_clone])
    cell2=np.random.choice(list(set(cell2_choice)),num,replace=False)
    cell2=list(cell2)
    rel=np.random.choice([2],num,replace=True)
    rel=list(rel)
    traincell1+=cell1
    traincell2+=cell2
    train_rel+=rel

num=50
from_index=list(set(testcell1))
for i in range(len(set(from_index))):
    print(i)
    a=[]
    a.append(from_index[i])
    cell1=np.random.choice(a,num,replace=True)
    cell1=list(cell1)
    cell1_clone=df[df['from_index']==from_index[i]]['clone'].values[0]
    cell2_choice=set(df['to_index'].unique())-set(dic_clone[cell1_clone])

    cell2=np.random.choice(list(set(cell2_choice)),num,replace=False)
    cell2=list(cell2)
    rel=np.random.choice([2],num,replace=True)
    rel=list(rel)

    testcell1+=cell1
    testcell2+=cell2
    test_rel+=rel

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
[605]:
dic_train={}
dic_test={}
for i in train_rel:
    dic_train[i] = dic_train.get(i, 0) + 1
print(dic_train)
for i in test_rel:
    dic_test[i] = dic_test.get(i, 0) + 1
print(dic_test)
{0: 3500, 1: 5000, 2: 2400}
{0: 1700, 1: 700, 2: 1000}
[452]:
traincell1=np.loadtxt('/home/zhengtuo/songtao/3traincell1.txt')
traincell2=np.loadtxt('/home/zhengtuo/songtao/3traincell2.txt')
testcell1=np.loadtxt('/home/zhengtuo/songtao/3testcell1.txt')
testcell2=np.loadtxt('/home/zhengtuo/songtao/3testcell2.txt')
train_rel=np.loadtxt('/home/zhengtuo/songtao/3trainrel.txt',dtype=int)
test_rel=np.loadtxt('/home/zhengtuo/songtao/3testrel.txt',dtype=int)
[606]:
np.savetxt('/home/zhengtuo/songtao/3traincell1.txt',traincell1)
np.savetxt('/home/zhengtuo/songtao/3traincell2.txt',traincell2)
np.savetxt('/home/zhengtuo/songtao/3testcell1.txt',testcell1)
np.savetxt('/home/zhengtuo/songtao/3testcell2.txt',testcell2)
np.savetxt('/home/zhengtuo/songtao/3trainrel.txt',train_rel,fmt='%d')
np.savetxt('/home/zhengtuo/songtao/3testrel.txt',test_rel,fmt='%d')

[347]:
adata_orig=cs.hf.read("/home/zhengtuo/songtao/DestinyNet/darlin_6.24_processed.h5ad")
adata_orig
[347]:
AnnData object with n_obs × n_vars = 5487 × 21114
    obs: 'library', 'batch', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'predicted_doublet', 'cell_type', 'state_info', 'time_info', 'tissue', 'state_info_old', 'fate', 'fate_map_transition_map_MkP', 'fate_map_transition_map_Ery', 'fate_map_transition_map_Neu', 'fate_map_transition_map_Mon', 'Fate_bias', 'GT', 'index'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable'
    uns: 'GT_colors', 'Tmap_cell_id_t1', 'Tmap_cell_id_t2', 'available_map', 'cell_type_colors', 'clonal_cell_id_t1', 'clonal_cell_id_t2', 'clonal_time_points', 'clone_id', 'data_des', 'fate_map_params', 'fate_potency_tmp', 'intraclone_transition_map', 'library_colors', 'neighbors', 'scrublet', 'sp_idx', 'time_info_colors', 'time_ordering', 'tissue_colors', 'transition_map', 'umap'
    obsm: 'X_clone', 'X_emb', 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
[102]:
sc.write('/home/zhengtuo/songtao/DestinyNet/darlin_6.24_processed.h5ad',adata_orig)
adata_orig.obs
[102]:
library batch n_genes n_counts n_genes_by_counts total_counts total_counts_mt pct_counts_mt doublet_score predicted_doublet ... tissue state_info_old fate fate_map_transition_map_MkP fate_map_transition_map_Ery fate_map_transition_map_Neu fate_map_transition_map_Mon Fate_bias GT index
LL837_skull_AAACCCAAGAGTCACG LL837_skull 1 2182 10000.000000 2182 8330.0 566.0 6.794718 0.043403 0 ... Skull Mon 2 NaN NaN NaN NaN NaN NaN 0
LL837_skull_AAACCCAAGTGAACAT LL837_skull 1 2247 10000.000000 2247 10328.0 128.0 1.239349 0.048237 0 ... Skull Ery 2 NaN NaN NaN NaN NaN NaN 1
LL837_skull_AAACCCACATCCGTTC LL837_skull 1 4151 9999.999023 4150 29156.0 420.0 1.440527 0.012388 0 ... Skull Mon 2 NaN NaN NaN NaN NaN NaN 2
LL837_skull_AAACCCAGTCCACATA LL837_skull 1 1515 10000.000000 1515 6034.0 146.0 2.419622 0.146524 0 ... Skull Meg 2 NaN NaN NaN NaN NaN NaN 3
LL837_skull_AAACCCAGTGAACGGT LL837_skull 1 1782 10000.000000 1782 6942.0 108.0 1.555748 0.022658 0 ... Skull Meg 2 NaN NaN NaN NaN NaN NaN 4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
LL837_skull_TTTGGTTTCATGCCCT LL837_skull 1 4073 9999.999023 4073 29120.0 474.0 1.627747 0.019994 0 ... Skull Ery 2 NaN NaN NaN NaN NaN NaN 5482
LL837_skull_TTTGGTTTCGACGTCG LL837_skull 1 3697 10000.000977 3697 26760.0 632.0 2.361734 0.016280 0 ... Skull Neu 2 NaN NaN NaN NaN NaN NaN 5483
LL837_skull_TTTGTTGAGCGTTACT LL837_skull 1 1476 10000.000000 1476 6594.0 286.0 4.337276 0.079240 0 ... Skull LMPP 2 0.074549 0.185234 0.142106 0.400063 NaN NaN 5484
LL837_skull_TTTGTTGAGTGATCGG LL837_skull 1 3283 9999.999023 3283 21940.0 274.0 1.248861 0.023810 0 ... Skull Neu 2 NaN NaN NaN NaN NaN NaN 5485
LL837_skull_TTTGTTGGTTACAGCT LL837_skull 1 817 10000.000977 817 2316.0 192.0 8.290155 0.031239 0 ... Skull LMPP 2 0.163187 0.226108 0.252171 0.273444 NaN NaN 5486

5487 rows × 23 columns

[200]:
all_cell=list(set(list(df['from_index'])+list(df['to_index'])))
all_cell=np.array(all_cell).astype(int)

[202]:
len(all_cell)
[202]:
128
[947]:
adata_orig.obs['index']=range(adata_orig.shape[0])

[348]:
#adata3=adata_orig[adata_orig.obs['index'].isin(all_cell)]
adata3=adata_orig#{'Neu': 0, 'Ery': 0, 'MkP': 1, 'Mon': 0}
adata3
[348]:
AnnData object with n_obs × n_vars = 5487 × 21114
    obs: 'library', 'batch', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'predicted_doublet', 'cell_type', 'state_info', 'time_info', 'tissue', 'state_info_old', 'fate', 'fate_map_transition_map_MkP', 'fate_map_transition_map_Ery', 'fate_map_transition_map_Neu', 'fate_map_transition_map_Mon', 'Fate_bias', 'GT', 'index'
    var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable'
    uns: 'GT_colors', 'Tmap_cell_id_t1', 'Tmap_cell_id_t2', 'available_map', 'cell_type_colors', 'clonal_cell_id_t1', 'clonal_cell_id_t2', 'clonal_time_points', 'clone_id', 'data_des', 'fate_map_params', 'fate_potency_tmp', 'intraclone_transition_map', 'library_colors', 'neighbors', 'scrublet', 'sp_idx', 'time_info_colors', 'time_ordering', 'tissue_colors', 'transition_map', 'umap'
    obsm: 'X_clone', 'X_emb', 'X_pca', 'X_umap'
    obsp: 'connectivities', 'distances'
[747]:
adata3=adata_orig[adata_orig.obs['state_info'].isin(['Neu','Ery','MkP','Mon',"HSC"])].copy()
adata3.obs
[747]:
library batch n_genes n_counts n_genes_by_counts total_counts total_counts_mt pct_counts_mt doublet_score predicted_doublet cell_type state_info time_info tissue state_info_old
LL837_LF_AAACCCATCTTCGGTC LL837_LF 0 1146 10000.001953 1146 3210.0 16.0 0.498442 0.038168 0 HSC HSC HSC LF HSC
LL837_LF_AAACGAAGTTGTGCCG LL837_LF 0 3678 10000.000000 3677 19478.0 336.0 1.725023 0.039152 0 HSC HSC HSC LF HSC
LL837_LF_AAACGAATCGAGATAA LL837_LF 0 6453 10000.000000 6453 60940.0 1362.0 2.234985 0.015856 0 HSC HSC HSC LF HSC
LL837_LF_AAACGCTTCCATATGG LL837_LF 0 4972 10000.000000 4972 33454.0 700.0 2.092425 0.028299 0 HSC HSC HSC LF HSC
LL837_LF_AAACGCTTCGGAATGG LL837_LF 0 5806 10000.000000 5805 46320.0 1076.0 2.322971 0.036286 0 HSC HSC HSC LF HSC
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
LL837_spleen_TTTGGAGTCGAACACT LL837_spleen 2 3637 10000.000000 3637 20822.0 250.0 1.200653 0.032829 0 Ery Ery Ery Spleen Ery
LL837_spleen_TTTGGTTCACGATAGG LL837_spleen 2 3173 9999.999023 3173 15726.0 190.0 1.208190 0.013109 0 Ery Ery Ery Spleen Ery
LL837_spleen_TTTGGTTCAGCAGTCC LL837_spleen 2 6974 10000.000977 6974 106512.0 3098.0 2.908592 0.015034 0 Ery Ery Ery Spleen Ery
LL837_spleen_TTTGTTGCACCCATAA LL837_spleen 2 3348 9999.999023 3348 17774.0 280.0 1.575335 0.034509 0 Ery Ery Ery Spleen Ery
LL837_spleen_TTTGTTGGTCACTTAG LL837_spleen 2 6442 10000.000977 6442 78968.0 1528.0 1.934961 0.003584 0 Ery Ery Ery Spleen Ery

7226 rows × 15 columns

[751]:

                           clone_N  clone_fraction
state_info
Baso                            38        0.018260
Baso,Dc                          1        0.000481
Baso,Ery                         9        0.004325
Baso,Ery,Neu                     2        0.000961
Baso,Ery,Neu,Meg,LMPP,Mon        1        0.000481
...                            ...             ...
nan,LMPP,Mon                     5        0.002403
nan,LMPP,Mon,Neu                 1        0.000481
nan,Mon                          6        0.002883
nan,Mon,Neu                      3        0.001442
nan,Neu                          5        0.002403

[215 rows x 2 columns]
-----------
            cell_N  clonal_cell_fraction
state_info
Baso           163              0.028254
Dc             202              0.035015
Ery            992              0.171954
HSC           1419              0.245970
LMPP           683              0.118391
Lym             22              0.003813
Meg            333              0.057722
Mon            621              0.107644
Neu            754              0.130699
nan            580              0.100537
add information at obs['clone_id']
[751]:
2081
[607]:
from math import sqrt

import torch
import torch.nn as nn
num_relations=3
len_geneExp=21114
len_embedding=256
learning_rate=0.0001
num_epoch=10
batchsize=512
class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.LeakyReLU(inplace=False),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _ = x.size()
        #print('channel:',c)
        #print('size:',x.shape)
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        #print('y_size:',y.shape)
        return x * y.expand_as(x)
class MutiheadAttention(nn.Module):
    def __init__(self, input_dim, dim_k, dim_v,num_heads):
        super(MutiheadAttention, self).__init__()
        self.dim_q = dim_k # 一般默认 Q=K
        self.dim_k = dim_k
        self.dim_v = dim_v
        self.num_units=dim_k
        self.num_heads=num_heads
        #定义线性变换函数
        self.linear_q = nn.Linear(input_dim, dim_k, bias=False)
        self.linear_k = nn.Linear(input_dim, dim_k, bias=False)
        self.linear_v = nn.Linear(input_dim, dim_v, bias=False)
        self._norm_fact = 1 / sqrt(dim_k)

    def forward(self, x):
        # x: batch_size, seq_len, input_dim
        q = self.linear_q(x)  # batch_size, seq_len, dim_k
        k = self.linear_k(x)  # batch_size, seq_len, dim_k
        v = self.linear_v(x)  # batch_size, seq_len, dim_v
        split_size = self.num_units // self.num_heads
        q = torch.stack(torch.split(q, split_size, dim=2), dim=0)  # [h, N, T_q, num_units/h]
        k = torch.stack(torch.split(k, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]
        v = torch.stack(torch.split(v, split_size, dim=2), dim=0)  # [h, N, T_k, num_units/h]

        scores = torch.matmul(q, k.transpose(2, 3))
        scores = scores / (self.dim_k ** 0.5)

        scores = F.softmax(scores, dim=3)
        ## out = score * V
        out = torch.matmul(scores, v)  # [h, N, T_q, num_units/h]
        out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0)

        return out

class ResidualBlock(torch.nn.Module):
    def __init__(self,channels):
        super(ResidualBlock,self).__init__()
        self.channels = channels

        self.conv1 = nn.Conv1d(channels,channels,kernel_size=3,padding=1)
        self.conv2 = nn.Conv1d(channels,channels,kernel_size=3,padding=1)
        self.se=SELayer(channels,16)
    def forward(self, x):
        y = F.relu(self.conv1(x))
        y = self.conv2(y)
        y=self.se(y)
        return F.relu(x+y)

class model(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.att=MutiheadAttention(len_embedding*2,512,512,64)
        self.layernorm=nn.LayerNorm(512)
        self.conv1 = nn.Conv1d(1, 32, 4)  # 输入通道数为1,输出通道数为6
        self.relu1=nn.LeakyReLU(0.2, inplace=True)
        self.rblock1 = ResidualBlock(32)
        self.conv2 = nn.Conv1d(32,64, 4)  # 输入通道数为6,输出通道数为16
        self.batchn1=nn.BatchNorm1d(64)
        self.relu2= nn.LeakyReLU(0.2, inplace=True)
        self.rblock2 = ResidualBlock(64)
        self.conv3=nn.Conv1d(64,128,4)
        self.batchn2=nn.BatchNorm1d(128)
        self.relu3= nn.LeakyReLU(0.2, inplace=True)
        self.rblock3 = ResidualBlock(128)
        self.conv4=nn.Conv1d(128,256,4)
        self.batchn3=nn.BatchNorm1d(256)
        self.relu4= nn.LeakyReLU(0.2, inplace=True)
        self.rblock4 = ResidualBlock(256)
        self.dropout=nn.Dropout()
        self.fc1 = nn.Linear(7424, num_relations)

    def forward(self, x):
        # 输入x -> conv1 -> relu -> 2x2窗口的最大池化
        x=self.att(x)+x
        x=self.layernorm(x)
        x = self.conv1(x)
        x = self.relu1(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock1(x)
        # # 输入x -> conv2 -> relu -> 2x2窗口的最大池化
        x = self.conv2(x)
        x=self.batchn1(x)
        x=self.relu2(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock2(x)

        x = self.conv3(x)
        x=self.batchn2(x)
        x=self.relu3(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock3(x)

        x = self.conv4(x)
        x=self.batchn3(x)
        x=self.relu4(x)
        x = F.max_pool1d(x, 2)
        x=self.rblock4(x)
        # # view函数将张量x变形成一维向量形式,总特征数不变,为全连接层做准备
        x = x.view(x.size()[0], -1)
        x=self.dropout(x)
        x=self.fc1(x)
        return x
geneEnc=nn.Sequential(
    nn.Dropout(),
    nn.Linear(len_geneExp, 100),
    nn.ReLU(),
            nn.Linear(100, 100),
            nn.ReLU(),
     nn.Linear(100, 100),
            nn.ReLU(),
    nn.Linear(100, 100),
            nn.ReLU(),
    nn.Linear(100, len_embedding),
)
geneDec = nn.Sequential(
    nn.Linear(len_embedding, len_geneExp),
    #nn.Linear(512, len_geneExp),

)
genemap = nn.Sequential(
    #nn.Dropout(),
     nn.Linear(len_embedding, 100),
     nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, 100),
   nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, 100),
   nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, 100),
    nn.BatchNorm1d(100),
    nn.ReLU(),
    nn.Linear(100, len_embedding),

)


model=model()
device = torch.device("cuda:5")
device2 = torch.device("cuda:5")
path='/home/zhengtuo/songtao/darlin1.pth'
pretrained_weights = torch.load(path,'cuda:5')
model.load_state_dict(pretrained_weights['model'])
geneEnc.load_state_dict(pretrained_weights['geneEnc'])
genemap.load_state_dict(pretrained_weights['genemap'])
geneDec.load_state_dict(pretrained_weights['geneDec'])

model.to(device2)
geneEnc.to(device2)
geneDec.to(device2)
genemap.to(device2)


genemap.eval()
model.eval()
geneEnc.eval()
geneDec.eval()

[607]:
Sequential(
  (0): Linear(in_features=256, out_features=21114, bias=True)
)
[488]:
adata3.obsm['X_pca']=adata3.obsm['X_pca'].cpu().numpy()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In [488], line 1
----> 1 adata3.obsm['X_pca']=adata3.obsm['X_pca'].cpu().numpy()

AttributeError: 'numpy.ndarray' object has no attribute 'cpu'
[608]:

Embeddings_path = '/home/zhengtuo/songtao/dynamic2.txt.npz' # txt dir to save cell embeddings from tqdm import trange geneEnc.to(device2) all_embeddings = [] with torch.no_grad(): for i in trange(adata3.shape[0]): x = 1*adata3.X[i].toarray() geneExp = torch.tensor(x).to(device2) geneExp=torch.reshape(geneExp,(1,21114)) geneExp = geneExp.to(torch.float32) geneEmbedding = geneEnc(geneExp) geneEmbedding = geneEmbedding.cpu().detach().numpy() # or any other data type all_embeddings.append(geneEmbedding) np.savez(Embeddings_path,all_embeddings) mapembedding = [] with torch.no_grad(): for i in trange(adata3.shape[0]): x = 1*adata3.X[i].toarray() geneExp = torch.tensor(x).to(device2) geneExp = geneExp.to(torch.float32) geneEmbedding = geneEnc(geneExp) geneEmbedding=genemap(geneEmbedding) geneEmbedding = geneEmbedding.cpu().detach().numpy() # or any other data type mapembedding.append(geneEmbedding) all_embeddings=np.array(all_embeddings) mapembedding=np.array(mapembedding) adata3.obsm['geneEnc']=all_embeddings adata3.obsm['genemap']=mapembedding #adata2.obsm['X_pca']=adata2.obsm['X_pca'].cpu().numpy()
100%|█████████████████████████████████████████████████████████████████| 4919/4919 [00:02<00:00, 1798.93it/s]
100%|█████████████████████████████████████████████████████████████████| 4919/4919 [00:04<00:00, 1105.89it/s]
[566]:
adata3 = adata3.copy()
dic1 = {'0': 'no mkp_bias', '1': 'mkp_bias'}
adata3.obs['GT'] = adata3.obs['GT'].astype(str).map(dic1)
[353]:
#adata3.obs.loc[adata3.obs['state_info'] != 'HSC', 'GT'] = None
adata3.obs['state_info']
[353]:
LL837_skull_AAACCCAAGAGTCACG     Mon
LL837_skull_AAACCCAAGTGAACAT     Ery
LL837_skull_AAACCCACATCCGTTC     Mon
LL837_skull_AAACCCAGTCCACATA     MkP
LL837_skull_AAACCCAGTGAACGGT     MkP
                                ...
LL837_skull_TTTGGTTTCATGCCCT     Ery
LL837_skull_TTTGGTTTCGACGTCG     Neu
LL837_skull_TTTGTTGAGCGTTACT    LMPP
LL837_skull_TTTGTTGAGTGATCGG     Neu
LL837_skull_TTTGTTGGTTACAGCT    LMPP
Name: state_info, Length: 5487, dtype: category
Categories (7, object): ['Baso', 'Ery', 'HSC', 'LMPP', 'MkP', 'Mon', 'Neu']
[511]:
adata3.obs['GT'].unique()
[511]:
array([nan], dtype=object)
[609]:
import numpy as np
import torch
import scanpy as sc
import matplotlib.pyplot as plt

# 创建figure
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)

# 加载数据
n = np.load('/home/zhengtuo/songtao/dynamic2.txt.npz')
n = torch.tensor(n['arr_0'])
n = torch.reshape(n, (4919, 256))
adata3.obsm["X_pca"] = n

# 进行UMAP嵌入
cs.pp.get_X_emb(adata3, n_neighbors=20, umap_min_dist=1)

# 将 'Mon', 'Neu', 'Ery' 定义为 'Non-MkP'
#adata3.obs['state_info'] = adata3.obs['state_info'].replace({'Mon': 'Non-MkP', 'Neu': 'Non-MkP', 'Ery': 'Non-MkP'})

# 定义颜色映射
state_colors = {
    'HSC': '#E6DCD9',   # 蓝色
    'MkP': '#1f77b4',   #
    'Non-MkP': '#2ca02c'  # 绿色橙色
}

# 绘制UMAP图
sc.pl.umap(adata3, s=50, color="state_info", ax=ax)

# 将X_pca数据转换为numpy数组
adata3.obsm['X_pca'] = adata3.obsm['X_pca'].cpu().numpy()
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)
# 绘制仅包含'GT'为'0'或'1'的UMAP图
GT_colors= {
    'HSC': '#ff7f0e',   # 蓝色
    'mkp_bias': '#1f77b4',   #
    'no mkp_bias': '#2ca02c'  # 绿色橙色
}

sc.pl.umap(adata3, color="GT", s=100,ax=ax,palette=GT_colors)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_GT_score0.25.pdf', bbox_inches='tight')

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_77_1.png
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_77_3.png
[ ]:
sc.pl.umap(adata3, s=20, color="state_info")

[ ]:

[975]:
import sklearn
sklearn.metrics.silhouette_score(
    adata3[adata3.obs['GT'].isin(['mkp_bias','no mkp_bias'])].obsm["X_umap"],
    adata3[adata3.obs['GT'].isin(['mkp_bias','no mkp_bias'])].obs['GT']
)
[975]:
0.25367096
[386]:
adata_orig.obs['index']=adata_orig.obs['index'].astype(int)
[387]:
adata_orig[adata_orig.obs['state_info']=='HSC'].obs['index'].unique()
adata_orig[adata_orig.obs['time_info']=='t1'].obs['state_info'].unique()

[387]:
['Mon', 'Ery', 'MkP', 'Neu', 'Baso']
Categories (5, object): ['Baso', 'Ery', 'MkP', 'Mon', 'Neu']
[439]:

[439]:
array(['nan', '1', '0'], dtype=object)
[610]:
dic_true={}
for i in range((adata_orig.shape[0])):
    if(adata_orig.obs['GT'][i]=='1'):
        dic_true[adata_orig.obs['index'][i]]=1
    elif(adata_orig.obs['GT'][i]=='0'):
        dic_true[adata_orig.obs['index'][i]]=0
    else:
        dic_true[adata_orig.obs['index'][i]]=2
#all_cell1=list(set(list(traincell1)+list(testcell1)))
#all_cell2=list(set(list(traincell2)+list(testcell2)))
all_cell1=adata_orig[adata_orig.obs['state_info']=='HSC'].obs['index'].unique()
all_cell2=adata_orig[adata_orig.obs['time_info']=='t1'].obs['index'].unique()
all_cell2=list(set(list(traincell2)+list(testcell2)))
from torch.utils.data import Dataset, DataLoader
class UndiffDataset(Dataset):
    # 构造器初始化方法
    def __init__(self,length):
        self.length=length

    # 重写getitem方法用于通过idx获取数据内容
    def __getitem__(self, idx):
        i=int(list((all_cell1))[idx])
        gene=adata_orig.X[i].toarray()
        gene=torch.tensor(gene)
        true_fate=dic_true[i]
        #genetype=dic1[genetype]
        true_fate=torch.tensor(true_fate)
        return gene,true_fate

    # 重写len方法获取数据集大小
    def __len__(self):
        return self.length
class otherDataset(Dataset):
    # 构造器初始化方法
    def __init__(self,length):
        self.length=length

    # 重写getitem方法用于通过idx获取数据内容
    def __getitem__(self, idx):
        i=int(list((all_cell2))[idx])
        gene=adata_orig.X[i].toarray()
        gene=torch.tensor(gene)
        genetype=adata_orig.obs['state_info'][i]
        #genetype=dic[genetype]
        #genetype=torch.tensor(genetype)
        return gene,1

    # 重写len方法获取数据集大小
    def __len__(self):
        return self.length
def dup_rows(a, indx, num_dups=1):
    return np.insert(a,[indx+1]*num_dups,a[indx],axis=0)


unidffdataset = UndiffDataset(len(all_cell1))
otherdataset=otherDataset(len(all_cell2))
Undiffdataloader = DataLoader(unidffdataset, batch_size=len((all_cell1)), num_workers=0, shuffle=False, drop_last=False)
otherdataloader=DataLoader(otherdataset, batch_size=1, num_workers=0, shuffle=False, drop_last=False)
[611]:
import torch.nn.functional as F
true=0
dic={'Neu':0, 'Ery':0, 'MkP':1, 'Mon':0}
cudaid = 'cuda:5'
# dic={'Monocyte':0, 'Neutrophil':1, 'Lymphoid':2,'Mast':3, 'Ccr7_DC':4,
#        'Baso':5, 'Eos':6}
device2 = torch.device(cudaid)
geneEnc.to(device2)
model.to(device2)
for i, onebatch1 in enumerate(Undiffdataloader):
    cell1=onebatch1[0].to(device2)
    cell1=geneEnc(cell1)
    hang=cell1.shape[0]
    #cell1=torch.reshape(cell1,(hang,256))
    true_labels=onebatch1[1]
    #print(cell1)
    #print(true_labels)
    pre=torch.zeros(hang)
    pre_labels=torch.zeros((hang,3))
    print(i)
    for j, onebatch2 in enumerate(otherdataloader):
        print(j)
        cell2=onebatch2[0].to(device2)
        cell2=geneEnc(cell2)
        cell2=dup_rows(cell2.cpu().detach().numpy(), indx=0,num_dups=hang-1)
        cell2=torch.tensor(cell2).to(device2)
        #genetype=onebatch2[1]
        geneEmbs = torch.cat((cell1, cell2), 2)
        outputs = model(geneEmbs)
        outputs=torch.reshape(outputs,(hang,3))
        #print(outputs)
        outputs=outputs.argmax(-1)
        #print(outputs)
        for num in range(hang):
            if(int(outputs[num])!=2):
                pre_labels[num][int(outputs[num])]+=1
        del outputs,cell2,geneEmbs
        torch.cuda.empty_cache()
    matrix1=pre_labels
    #print(pre_labels)
    #print(pre_labels)
    pre_labels=np.argsort(-pre_labels)
    #print(pre_labels)
    pre_labels=pre_labels.T[0]
    #print(pre_labels)
    true+=np.count_nonzero(pre_labels==true_labels)
    #np.savetxt('/home/songtao/matrix.txt',matrix1)
    del cell1
    torch.cuda.empty_cache()

0
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
[462]:
true_labels=np.array(true_labels)
pre_labels=np.array(pre_labels)

mask = (true_labels != 2)

filtered_true_labels = true_labels[mask]
filtered_pre_labels = pre_labels[mask]

# 统计相等的数量
true = np.count_nonzero(filtered_pre_labels == filtered_true_labels)

print("统计数量:", true)
统计数量: 58
[ ]:

[612]:
true/len(list(set(list(traincell1)+list(testcell1))))
[612]:
0.8529411764705882
[901]:
from scipy.stats import pearsonr
correlation, _ = pearsonr(pre_labels, true_labels)
print(f'Correlation between true labels and predicted labels: {correlation:.4f}')
Correlation between true labels and predicted labels: 0.5448
[613]:
adata3.obs['index']=adata3.obs['index'].astype(int)
all_cell1=np.array(all_cell1).astype(int)
item=list((all_cell1))
dic_fate={}
dic1={0:'no mkp_bias',1:'mkp_bias',2:None}
#dic1={0:'multilineage',1:'myelo-erythroid restricted'}
for i in range(len(item)):
    dic_fate[item[i]]=dic1[int(pre_labels[i])]
adata3.obs['fate_pred']=None
adata3.obs['fate_pred']=adata3.obs['index'].map(dic_fate)
adata3.obs
[613]:
library batch n_genes n_counts n_genes_by_counts total_counts total_counts_mt pct_counts_mt doublet_score predicted_doublet ... fate fate_map_transition_map_MkP fate_map_transition_map_Ery fate_map_transition_map_Neu fate_map_transition_map_Mon Fate_bias GT index clone fate_pred
0 LL837_skull 1 2182 10000.000000 2182 8330.0 566.0 6.794718 0.043403 0 ... 2 NaN NaN NaN NaN NaN NaN 0 0 NaN
1 LL837_skull 1 2247 10000.000000 2247 10328.0 128.0 1.239349 0.048237 0 ... 2 NaN NaN NaN NaN NaN NaN 1 0 NaN
2 LL837_skull 1 4151 9999.999023 4150 29156.0 420.0 1.440527 0.012388 0 ... 2 NaN NaN NaN NaN NaN NaN 2 0 NaN
3 LL837_skull 1 1515 10000.000000 1515 6034.0 146.0 2.419622 0.146524 0 ... 2 NaN NaN NaN NaN NaN NaN 3 0 NaN
4 LL837_skull 1 1782 10000.000000 1782 6942.0 108.0 1.555748 0.022658 0 ... 2 NaN NaN NaN NaN NaN NaN 4 0 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5480 LL837_skull 1 4018 9999.999023 4018 28538.0 532.0 1.864181 0.019007 0 ... 2 NaN NaN NaN NaN NaN NaN 5480 0 NaN
5481 LL837_skull 1 3561 10000.000000 3561 22548.0 534.0 2.368281 0.015441 0 ... 2 NaN NaN NaN NaN NaN NaN 5481 73 NaN
5482 LL837_skull 1 4073 9999.999023 4073 29120.0 474.0 1.627747 0.019994 0 ... 2 NaN NaN NaN NaN NaN NaN 5482 0 NaN
5483 LL837_skull 1 3697 10000.000977 3697 26760.0 632.0 2.361734 0.016280 0 ... 2 NaN NaN NaN NaN NaN NaN 5483 412 NaN
5485 LL837_skull 1 3283 9999.999023 3283 21940.0 274.0 1.248861 0.023810 0 ... 2 NaN NaN NaN NaN NaN NaN 5485 344 NaN

4919 rows × 25 columns

[663]:
adata3.obs['prob']=None
sum=0
for i in all_cell1:
    adata3.obs.loc[adata3.obs['index']==i, 'prob'] = float(matrix1[sum][1]/(matrix1[sum][1]+matrix1[sum][0]))
    sum+=1
# 假设之前的代码已经定义并计算了 prob 和 dic_fate

# 初始化 fate_pred 列

# adata3.obs['fate_pred'] = None

# # 仅在 prob > 0.8 或 prob < 0.2 时更新 fate_pred
# mask_high = adata3.obs['prob'] > 0.8
# mask_low = adata3.obs['prob'] < 0.2

# # 更新 fate_pred
# adata3.obs.loc[mask_high, 'fate_pred'] = adata3.obs['index'].map(dic_fate)
# adata3.obs.loc[mask_low, 'fate_pred'] = adata3.obs['index'].map(dic_fate)

[682]:
import matplotlib.pyplot as plt
import scanpy as sc
GT_colors= {
    'HSC': '#E6DCD9',   # 蓝色
    'mkp_bias': '#1f77b4',   #
    'no mkp_bias': '#2ca02c'  # 绿色橙色
}
cospar_colors= {
    'HSC': '#E6DCD9',   # 蓝色
    'MkP bias': '#1f77b4',   #
    'No MkP bias': '#2ca02c'  # 绿色橙色
}
state_colors = {
    'HSC': '#E6DCD9',   # 蓝色
    'MkP': '#1f77b4',   #
    'Ery': '#9B724C' , # 绿色橙色
     'Baso': '#279E8F',
    'Mon':'#50C3C6',
    'Neu':'#FAA339',
}#'#2ca02c','#F7F7F7','#1f77b4'
# 创建figure
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)
adata_plot=adata3[adata3.obs['state_info']=='HSC']
# 绘制UMAP图
sc.pl.umap(adata_plot, color='fate_pred', s=30, show=False, ax=ax, palette=GT_colors)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_prd.pdf', bbox_inches='tight')

# 创建第二个figure
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)

# 绘制UMAP图
sc.pl.umap(adata3, color='state_info', s=30, show=False, ax=ax,palette=state_colors)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_clustering.pdf', bbox_inches='tight')

fig, ax = plt.subplots()
fig.set_size_inches(5, 5)
sc.pl.umap(adata[adata.obs['Fate_bias'].isin(['No MkP bias','MkP bias'])], color='Fate_bias', s=30, show=False, ax=ax,palette=cospar_colors)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_pred_cospar.pdf', bbox_inches='tight')
fig, ax = plt.subplots()
fig.set_size_inches(5, 5)
sc.pl.umap(adata[adata.obs['state_info'].isin(['Neu','Ery','MkP','Mon',"HSC"])], color='state_info', s=30, show=False, ax=ax,palette=state_colors)
ax.figure.savefig('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_clustering_cospar.pdf', bbox_inches='tight')

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_92_1.png
../_images/notebooks_Darlin_92_2.png
../_images/notebooks_Darlin_92_3.png
../_images/notebooks_Darlin_92_4.png
[641]:
# cs.pl.plot_adata_with_prefered_order(adata,'Fate_bias',plot_order=['LMPP','No MkP bias','MkP bias'],
#                               palette={'LMPP':'#d9d9d9','No MkP bias':'#ff7f0e','MkP bias':'#1f77b4','Late MkP bias':'grey'},
#                               linewidth = 0,s=30,background_size=5)
[670]:
adata.obs['state_info']
[670]:
LL837_skull_AAACCCAAGAGTCACG     Mon
LL837_skull_AAACCCAAGTGAACAT     Ery
LL837_skull_AAACCCACATCCGTTC     Mon
LL837_skull_AAACCCAGTCCACATA     MkP
LL837_skull_AAACCCAGTGAACGGT     MkP
                                ...
LL837_skull_TTTGGTTTCATGCCCT     Ery
LL837_skull_TTTGGTTTCGACGTCG     Neu
LL837_skull_TTTGTTGAGCGTTACT    LMPP
LL837_skull_TTTGTTGAGTGATCGG     Neu
LL837_skull_TTTGTTGGTTACAGCT    LMPP
Name: state_info, Length: 5487, dtype: category
Categories (7, object): ['Baso', 'Ery', 'HSC', 'LMPP', 'MkP', 'Mon', 'Neu']
[621]:
adata3.obs['GT'].value_counts()
[621]:
no mkp_bias    665
mkp_bias       363
Name: fate_pred, dtype: int64
[622]:
adata_orig.obs['GT'].value_counts()
[622]:
nan    5419
0        43
1        25
Name: GT, dtype: int64
[772]:

adata_orig.obs['fate_pred'] = None # 将 adata3 中的 fate_pred 值赋给 adata_orig 中对应的细胞 # 假设 adata3.obs 有与 adata_orig.obs 相同的索引 adata_orig.obs.loc[adata3.obs.index, 'fate_pred'] = adata3.obs['fate_pred'] # 显示前几行查看结果 print(adata_orig.obs['fate_pred'].head())
0    None
1    None
2    None
3    None
4    None
Name: fate_pred, dtype: object
[773]:
sc.pl.umap(adata_orig,color='GT',s=30)

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_98_1.png
[673]:
def compute_velocity_on_grid(
    X_emb,
    M_emb,
    density=None,
    smooth=None,
    n_neighbors=None,
    min_mass=None,
    autoscale=True,
    adjust_for_stream=False,
    cutoff_perc=None,
):
    # remove invalid cells
    idx_valid = np.isfinite(X_emb.sum(1) + M_emb.sum(1))
    X_emb = X_emb[idx_valid]
    M_emb = M_emb[idx_valid]

    # prepare grid
    n_obs, n_dim = X_emb.shape
    density = 0.5 if density is None else density
    smooth = 0.5 if smooth is None else smooth

    grs = []
    for dim_i in range(n_dim):
        m, M = np.min(X_emb[:, dim_i]), np.max(X_emb[:, dim_i])
        m = m - 0.01 * np.abs(M - m)
        M = M + 0.01 * np.abs(M - m)
        gr = np.linspace(m, M, int(50 * density))
        grs.append(gr)

    meshes_tuple = np.meshgrid(*grs)
    X_grid = np.vstack([i.flat for i in meshes_tuple]).T

    # estimate grid velocities
    if n_neighbors is None:
        n_neighbors = int(n_obs / 50)
    nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=-1)
    nn.fit(X_emb)
    dists, neighs = nn.kneighbors(X_grid)

    scale = np.mean([(g[1] - g[0]) for g in grs]) * smooth
    weight = normal.pdf(x=dists, scale=scale)
    p_mass = weight.sum(1)

    V_grid = (M_emb[neighs] * weight[:, :, None]).sum(1)
    V_grid /= np.maximum(1, p_mass)[:, None]
    if min_mass is None:
        min_mass = 1

    if adjust_for_stream:
        X_grid = np.stack([np.unique(X_grid[:, 0]), np.unique(X_grid[:, 1])])
        ns = int(np.sqrt(len(V_grid[:, 0])))
        V_grid = V_grid.T.reshape(2, ns, ns)

        mass = np.sqrt((V_grid ** 2).sum(0))
        min_mass = 10 ** (min_mass - 6)  # default min_mass = 1e-5
        min_mass = np.clip(min_mass, None, np.max(mass) * 0.9)
        cutoff = mass.reshape(V_grid[0].shape) < min_mass

        if cutoff_perc is None:
            cutoff_perc = 5
        length = np.sum(np.mean(np.abs(M_emb[neighs]), axis=1), axis=1).T
        length = length.reshape(ns, ns)
        cutoff |= length < np.percentile(length, cutoff_perc)

        V_grid[0][cutoff] = np.nan
    else:
        min_mass *= np.percentile(p_mass, 99) / 100
        X_grid, V_grid = X_grid[p_mass > min_mass], V_grid[p_mass > min_mass]

        if autoscale:
            V_grid /= 3 * quiver_autoscale(X_grid, V_grid)

    return X_grid, V_grid

def quiver_autoscale(X_emb, V_emb):
    import matplotlib.pyplot as pl

    scale_factor = np.abs(X_emb).max()  # just so that it handles very large values
    fig, ax = pl.subplots()
    Q = ax.quiver(
        X_emb[:, 0] / scale_factor,
        X_emb[:, 1] / scale_factor,
        V_emb[:, 0],
        V_emb[:, 1],
        angles="xy",
        scale_units="xy",
        scale=None,
    )
    Q._init()
    fig.clf()
    pl.close(fig)
    return Q.scale / scale_factor
import numpy as np
from scipy.stats import norm as normal
from sklearn.neighbors import NearestNeighbors
import tqdm
sc.pp.neighbors(adata3)

X = adata3.obsm['geneEnc'][:,0,:]
M = adata3.obsm['genemap'][:,0,:]
N = adata3.obsp['connectivities']
E = adata3.obsm["X_umap"]
norm = lambda x: (x-min(x))/(max(x)-min(x))
nn = NearestNeighbors(n_neighbors=20)
nn.fit(X)
dE = []


for i in tqdm.trange(N.shape[0]):
    D,I = nn.kneighbors( np.array([M[i]]) )
    dE.append( (E[I][0] - E[i]).mean(0))

dE = np.vstack(dE)
E = E[adata3.obs['cell_type'] == 'HSC']
dE = dE[adata3.obs['cell_type'] == 'HSC']

gE, gdE = compute_velocity_on_grid(E,dE,density=2, adjust_for_stream=True,smooth=True)

fig,ax=createFig(figsize=(5, 5))
fig.set_size_inches(5,5)
palette = sc.pl._tools.scatterplots._get_palette(adata3, 'state_info')
ax.scatter(adata3.obsm["X_umap"][:,0],adata3.obsm["X_umap"][:,1],s=10, linewidths=0,c=list(map(lambda x: palette[x], adata3.obs['state_info'])))
stream_kwargs = {
        "linewidth": 0.5,
        "density": 1,
        "zorder": 3,
        "color": "black",
        "arrowsize":  1,

        "arrowstyle": "-|>",
        "maxlength":  10,
        "integration_direction":  "both",
}

ax.streamplot(gE[0],gE[1],gdE[0],gdE[1],**stream_kwargs)
plt.savefig("/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fateflow.pdf")
100%|██████████████████████████████████████████████████████████████████| 4919/4919 [00:16<00:00, 301.85it/s]
../_images/notebooks_Darlin_99_1.png
[723]:
pei_adata=adata3
def compute_velocity_on_grid(
    X_emb,
    M_emb,
    density=None,
    smooth=None,
    n_neighbors=None,
    min_mass=None,
    autoscale=True,
    adjust_for_stream=False,
    cutoff_perc=None,
):
    # remove invalid cells
    idx_valid = np.isfinite(X_emb.sum(1) + M_emb.sum(1))
    X_emb = X_emb[idx_valid]
    M_emb = M_emb[idx_valid]

    # prepare grid
    n_obs, n_dim = X_emb.shape
    density = 0.5 if density is None else density
    smooth = 0.5 if smooth is None else smooth

    grs = []
    for dim_i in range(n_dim):
        m, M = np.min(X_emb[:, dim_i]), np.max(X_emb[:, dim_i])
        m = m - 0.01 * np.abs(M - m)
        M = M + 0.01 * np.abs(M - m)
        gr = np.linspace(m, M, int(50 * density))
        grs.append(gr)

    meshes_tuple = np.meshgrid(*grs)
    X_grid = np.vstack([i.flat for i in meshes_tuple]).T

    # estimate grid velocities
    if n_neighbors is None:
        n_neighbors = int(n_obs / 50)
    nn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=-1)
    nn.fit(X_emb)
    dists, neighs = nn.kneighbors(X_grid)

    scale = np.mean([(g[1] - g[0]) for g in grs]) * smooth
    weight = normal.pdf(x=dists, scale=scale)
    p_mass = weight.sum(1)

    V_grid = (M_emb[neighs] * weight[:, :, None]).sum(1)
    V_grid /= np.maximum(1, p_mass)[:, None]
    if min_mass is None:
        min_mass = 1

    if adjust_for_stream:
        X_grid = np.stack([np.unique(X_grid[:, 0]), np.unique(X_grid[:, 1])])
        ns = int(np.sqrt(len(V_grid[:, 0])))
        V_grid = V_grid.T.reshape(2, ns, ns)

        mass = np.sqrt((V_grid ** 2).sum(0))
        min_mass = 10 ** (min_mass - 6)  # default min_mass = 1e-5
        min_mass = np.clip(min_mass, None, np.max(mass) * 0.9)
        cutoff = mass.reshape(V_grid[0].shape) < min_mass

        if cutoff_perc is None:
            cutoff_perc = 5
        length = np.sum(np.mean(np.abs(M_emb[neighs]), axis=1), axis=1).T
        length = length.reshape(ns, ns)
        cutoff |= length < np.percentile(length, cutoff_perc)

        V_grid[0][cutoff] = np.nan
    else:
        min_mass *= np.percentile(p_mass, 99) / 100
        X_grid, V_grid = X_grid[p_mass > min_mass], V_grid[p_mass > min_mass]

        if autoscale:
            V_grid /= 3 * quiver_autoscale(X_grid, V_grid)

    return X_grid, V_grid

def quiver_autoscale(X_emb, V_emb):
    import matplotlib.pyplot as pl

    scale_factor = np.abs(X_emb).max()  # just so that it handles very large values
    fig, ax = pl.subplots()
    Q = ax.quiver(
        X_emb[:, 0] / scale_factor,
        X_emb[:, 1] / scale_factor,
        V_emb[:, 0],
        V_emb[:, 1],
        angles="xy",
        scale_units="xy",
        scale=None,
    )
    Q._init()
    fig.clf()
    pl.close(fig)
    return Q.scale / scale_factor
import numpy as np
from scipy.stats import norm as normal
from sklearn.neighbors import NearestNeighbors
import tqdm
sc.pp.neighbors(pei_adata)

X = pei_adata.obsm['geneEnc'][:,0,:]
M = pei_adata.obsm['genemap'][:,0,:]
N = pei_adata.obsp['connectivities']
E = pei_adata.obsm["X_umap"]
norm = lambda x: (x-min(x))/(max(x)-min(x))
nn = NearestNeighbors(n_neighbors=5)
nn.fit(X)
dE = []


for i in tqdm.trange(N.shape[0]):
    D,I = nn.kneighbors( np.array([M[i]]) )
    dE.append( (E[I][0] - E[i]).mean(0))

dE = np.vstack(dE)
E = E[pei_adata.obs['cell_type'] == 'HSC']
dE = dE[pei_adata.obs['cell_type'] == 'HSC']

gE, gdE = compute_velocity_on_grid(E,dE,density=2, adjust_for_stream=True,smooth=True)

fig,ax=createFig(figsize=(5, 5))
fig.set_size_inches(5,5)
palette = sc.pl._tools.scatterplots._get_palette(pei_adata, 'cell_type')
ax.scatter(
    pei_adata[list(map(lambda x: x != 'HSC', pei_adata.obs['cell_type']))].obsm["X_umap"][:,0],
    pei_adata[list(map(lambda x: x != 'HSC', pei_adata.obs['cell_type']))].obsm["X_umap"][:,1],
    s = 10, color='#E6DCD9', linewidth=0
)
sc.pl.umap(
    pei_adata[list(map(lambda x: x == 'HSC', pei_adata.obs['cell_type']))],
    ax=ax,
    color='prob',
    s=30,
    cmap=make_colormap(['#2ca02c','#F7F7F7','#1f77b4']),
    show=False
)
stream_kwargs = {
        "linewidth": 0.5,
        "density": 1,
        "zorder": 3,
        "color": "black",
        "arrowsize":  1,

        "arrowstyle": "-|>",
        "maxlength":  10,
        "integration_direction":  "both",
}

ax.streamplot(gE[0],gE[1],gdE[0],gdE[1],**stream_kwargs)
#plt.savefig("/home/zhengtuo/songtao/DestinyNet/pei_figure/flow.pdf")
plt.savefig("/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_flow.pdf")
100%|██████████████████████████████████████████████████████████████████| 4919/4919 [00:16<00:00, 304.39it/s]
../_images/notebooks_Darlin_100_1.png
[666]:
data.shape
[666]:
(1028,)
[684]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
from colour import Color
from matplotlib.colors import LinearSegmentedColormap
def make_colormap( colors, show_palette = False ):
    color_ramp = LinearSegmentedColormap.from_list( 'my_list', [ Color( c1 ).rgb for c1 in colors ] )
    if show_palette:
        plt.figure( figsize = (15,3))
        plt.imshow( [list(np.arange(0, len( colors ) , 0.1)) ] , interpolation='nearest', origin='lower', cmap= color_ramp )
        plt.xticks([])
        plt.yticks([])
    return color_ramp
pei_adata=adata3.copy()
colormap=make_colormap(['#1f77b4','#F7F7F7','#2ca02c'])
# 获取数据
data = adata3[adata3.obs['cell_type']=='HSC'].obs['prob']

# 创建一个新的figure
plt.figure(figsize=(8, 6))

# 获取直方图的数据
n, bins, patches = plt.hist(data, bins=30, edgecolor='black', alpha=0.7)

# Normalize data to 0-1 range for colormap
norm = plt.Normalize(data.min(), data.max())

# 设置每个bin的颜色
for thisbin, thispatch in zip(bins, patches):
    color = colormap(norm(thisbin))
    thispatch.set_facecolor(color)

# 添加标题和轴标签
plt.title("Distribution of fate_pred_prob")
plt.xlabel("fate_pred_prob")
plt.ylabel("Number of Cells")
plt.savefig("/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fate_hisgram_pei.pdf", bbox_inches='tight')

# 显示图形
plt.show()

../_images/notebooks_Darlin_102_0.png
[ ]:
GT_colors= {
    'HSC': '#2ca02c',   # 蓝色
    'mkp bias': '#1f77b4',   #
    'Non-MkP': '#ff7f0e'  # 绿色橙色
}
[686]:
import scanpy as sc
import matplotlib.pyplot as plt
import pandas as pd

# 筛选数据
adata2 = adata3[(adata3.obs['fate_pred'].isin(['mkp_bias', 'no mkp_bias']))].copy()
#adata2 = adata2[adata2.obs['cell_source'] == 'H9']

# 绘制UMAP
plt.rcParams['font.size'] = 14
fig, ax = plt.subplots(figsize=(3, 3))
sc.pl.umap(adata2, color=['fate_pred'], ax=ax, s=10)

# 计算差异表达基因
sc.tl.rank_genes_groups(adata2, groupby='fate_pred', method='t-test_overestim_var',use_raw=False)

min_percentage_threshold = 0.6

# 选择感兴趣的基因
selected_genes = pd.DataFrame(adata2.uns['rank_genes_groups']['names']).to_numpy().flatten()

# 计算每个基因的表达百分比,并在找到16个符合条件的基因后停止
filtered_genes = []
for gene in selected_genes:
    if gene in adata2.var_names:
        gene_expression = adata2[:, gene].X
        expressed_cells_percentage = (gene_expression > 0).sum() / gene_expression.shape[0]
        if expressed_cells_percentage >= min_percentage_threshold:
            filtered_genes.append(gene)
        if len(filtered_genes) >= 16:
            break

# 如果过滤后的基因不足16个,从其他基因中补充
if len(filtered_genes) < 16:
    additional_genes = (gene for gene in adata2.var_names if gene not in filtered_genes)
    filtered_genes.extend(islice(additional_genes, 16 - len(filtered_genes)))

# 使用过滤后的基因绘制 dotplot
sc.pl.dotplot(
    adata2,
    var_names=filtered_genes,
    groupby='fate_pred',
    figsize=(10, 6),
    standard_scale='var',
)

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_104_1.png
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
../_images/notebooks_Darlin_104_3.png
[721]:
pei_adata=adata3.copy()
indices = (np.array(pei_adata.obs['cell_type'] == 'HSC'))
sc.tl.leiden(pei_adata, resolution=0.5)
pei_adata.obs['fate_pred'].cat.add_categories(['-'], inplace=True)

pei_adata.obs['fate_pred'] = pei_adata.obs['fate_pred'].fillna('-')

/tmp/ipykernel_67646/551174476.py:4: FutureWarning: The `inplace` parameter in pandas.Categorical.add_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  pei_adata.obs['fate_pred'].cat.add_categories(['-'], inplace=True)
[722]:
sc.pl.umap(pei_adata,color='leiden')

/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/notebooks_Darlin_106_1.png
[720]:
import seaborn as sns

xkey = 'geneEnc'
mkey = 'genemap'
X = pei_adata.obsm['geneEnc'][:,0,:]
M = pei_adata.obsm['genemap'][:,0,:]
N = pei_adata.obsp['connectivities']
E = pei_adata.obsm["X_umap"]



# gE, gdE = biddy_adata.uns['gE'], biddy_adata.uns['gdE']
# gE, gdE = compute_velocity_on_grid(E,dE,density=2, adjust_for_stream=True,smooth=True)
fig,ax=createFig()
fig.set_size_inches(6, 5)
palette={
 'Ery': '#2A9E27',
 'Lymphoid': '#E43E96',
 'Mast': '#B5BD61',
 'My': '#279E9E',
 'Mo': '#50C3C6',
 'Neu': '#FAA339',
 'ME': '#279E8F',
 'Ly': '#E43E96',
 'HSC': '#E6DCD9',
    '-':'#E6DCD9'
}
ax.scatter(E[:,0],E[:,1],s=1, linewidths=0)
# ax.scatter(E[:,0],E[:,1],s=1, linewidths=0,c=list(map(lambda x: sc.pl._tools.scatterplots._get_palette(biddy_adata, 'leiden')[x], biddy_adata.obs['leiden'])))
tmp = pei_adata.obs

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))
# sc.pl.umap(weinreb_adata, ax=ax, color='Gata2')

i=np.random.choice(list(map(lambda z: z[0],
                            filter(lambda x: x[1] == '1',
                                   enumerate(pei_adata.obs['leiden'])))), 1)
i
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(n_neighbors=5)
nn.fit(X)
D,I = nn.kneighbors(M[i])
e = E[I][0]


E = pei_adata.obsm["X_umap"]

ax.scatter(
    pei_adata.obsm["X_umap"][:,0],
    pei_adata.obsm["X_umap"][:,1],
    color='#F1F1F1',
    s=2
)

stream_kwargs = {
        "linewidth": 0.5,
        "density": 1.8,
        "zorder": 3,
        "color": "black",
        "arrowsize":  1,
        "arrowstyle": "-|>",
        "maxlength":  10,
        "integration_direction":  "both",
}
ax.streamplot(gE[0],gE[1],gdE[0],gdE[1],**stream_kwargs)

ax.scatter(E[i,0],E[i,1],color='red',s=20,marker='*')
# sns.kdeplot(e[:,0],e[:,1],ax=ax, fill=True, alpha=0.5, cmap='Reds',cbar=True)

sns.kdeplot(x=e[:,0], y=e[:,1], ax=ax, fill=True, alpha=0.5, cmap='Reds', cbar=True)

[720]:
<AxesSubplot: >
../_images/notebooks_Darlin_107_1.png
[859]:
from matplotlib.backends.backend_pdf import PdfPages
with PdfPages('/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_fateflow_hotpot4.pdf') as pdf:
    pdf.savefig(fig, bbox_inches='tight')
[861]:

# filtered_genes+=DGE_genes # sc.pl.dotplot( # adata2, # var_names=filtered_genes, # groupby='fate_pred', # figsize=(10, 6), # standard_scale='var', # )
[687]:
import scanpy as sc
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# 假设 adata3 已经加载到你的环境中

# 选择特定的细胞群体
adata2 = adata3[(adata3.obs['fate_pred'].isin(['mkp_bias', 'no mkp_bias']))].copy()

# 选择展示的基因列表
DGE_genes = ['Hlf', 'Mecom', 'Mpl', 'Mllt3', 'Tbxas1', 'Ifitm1']

# 对数据进行 Z-score 标准化
sc.pp.scale(adata2)

# 对选择的基因表达进行 Min-Max 标准化
scaler = MinMaxScaler()
for gene in DGE_genes:
    if gene in adata2.var_names:
        gene_expression = adata2[:, gene].X
        gene_expression = gene_expression.toarray() if isinstance(gene_expression, np.matrix) else gene_expression  # Convert sparse matrix to dense
        scaled_gene_expression = scaler.fit_transform(gene_expression)
        adata2[:, gene].X = scaled_gene_expression

# 绘制标准化后基因的 UMAP 图
for gene in DGE_genes:
    sc.pl.umap(adata2, color=gene, s=50, title=f'UMAP of {gene} (scaled 0-1)')

# 如果只绘制某一个基因的 UMAP 图
# sc.pl.umap(adata2, color="Hlf", s=50, title='UMAP of Hlf (scaled 0-1)')

../_images/notebooks_Darlin_110_0.png
../_images/notebooks_Darlin_110_1.png
../_images/notebooks_Darlin_110_2.png
../_images/notebooks_Darlin_110_3.png
../_images/notebooks_Darlin_110_4.png
../_images/notebooks_Darlin_110_5.png
[688]:
import seaborn as sns
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable
import torch.nn as nn

def compute_gradients_for_label(target_label, data, model_encoder, model_decoder):
    mask = data.obs['fate_pred'] == target_label
    filtered_data = data[mask]

    geneex_data = torch.tensor(filtered_data.X.toarray(), dtype=torch.float32).to(device2)

    dataset = TensorDataset(geneex_data)
    dataloader = DataLoader(dataset, batch_size=256, shuffle=False)

    total_gradients = torch.zeros(len_geneExp).to(device2)
    for batch in dataloader:
        geneExp = batch[0]
        geneExp = Variable(geneExp, requires_grad=True)
        output_data = model_encoder(geneExp)
        output_data = model_decoder(output_data)

        loss_function = nn.MSELoss()
        loss = loss_function(output_data, geneExp)

        model_encoder.zero_grad()
        model_decoder.zero_grad()
        loss.backward()

        total_gradients += geneExp.grad.abs().mean(dim=0)

    average_gradients = total_gradients / len(dataloader)

    return average_gradients

labels = ['mkp_bias', 'no mkp_bias']
gradients_per_label = {}

for label in labels:
    gradients_per_label[label] = compute_gradients_for_label(label, adata3, geneEnc, geneDec)

[689]:
num_key_genes = 100

key_genes_per_label = {}

for label, gradients in gradients_per_label.items():
    df = pd.DataFrame({
        'gene_ids': adata2.var.index.values,
        'importance': gradients.cpu().detach().numpy()
    })

    sorted_df = df.sort_values(by="importance", ascending=False)

    key_genes = sorted_df['gene_ids'].head(num_key_genes).tolist()
    key_genes_per_label[label] = key_genes

for label, genes in key_genes_per_label.items():
    print(f"Key genes for label {label}:")
    print(', '.join(genes))
    print("\n")

Key genes for label mkp_bias:
Malat1, Eef1a1, Fau, Tpt1, Ptma, Ppia, Rack1, Actb, Fth1, Gm42418, Angpt1, Eef1b2, Naca, Msi2, Npm1, Etv6, Ubb, Ifitm1, Tmsb4x, B2m, H3f3a, Gas5, Nrxn1, Aff3, H2-K1, Stmn1, H2afy, H3f3b, H2afz, Lrmda, Srgn, Ncl, Arhgap15, Adgrl4, Rnf220, Hmgb1, Zbtb20, Mecom, Ftl1, Pabpc1, Foxp1, Eif1, Runx1, Meis1, Ifitm3, Lmo2, Ctla2a, Nedd4, Dapp1, Eif5a, Pfn1, Fut8, Prkg1, Serbp1, Ybx1, Cdk6, Eef2, Anp32b, Elmo1, Cox8a, Actg1, Gpx1, Oaz1, H2-D1, Itm2b, Cox4i1, Ifitm2, Cmtm7, Atp5e, Myl6, Hnrnpa2b1, Ddx5, Akap13, Btf3, Gcnt2, Nme2, Hmgb2, Serf2, Rbm39, Atp5g2, Chchd2, Gapdh, Eif3f, Set, Ran, Eef1g, Mctp1, Rsrp1, Zfp36l2, Mbnl1, Ripor2, Atpif1, Uqcrh, Gng5, Snrpg, Cfl1, Ndufa4, Hnrnpu, Hint1, 2610307P16Rik


Key genes for label no mkp_bias:
Malat1, Eef1a1, Fau, Tpt1, Ptma, Rack1, Ppia, Gm42418, Angpt1, Actb, Etv6, Eef1b2, Msi2, Fth1, Naca, Nrxn1, Ifitm1, Npm1, Lrmda, Ubb, Gas5, Tmsb4x, Adgrl4, B2m, Arhgap15, Aff3, H3f3b, H2-K1, H3f3a, Zbtb20, H2afy, Rnf220, Mecom, Srgn, Foxp1, Ftl1, Meis1, Ctla2a, Ifitm3, Elmo1, Lmo2, Eef2, Dapp1, Eif1, Runx1, Akap13, Hmgb1, Gcnt2, Prkg1, Stmn1, Ncl, Nedd4, Pabpc1, Cdk6, 2610307P16Rik, H2-D1, Fut8, Itm2b, Btf3, Rbm39, Serbp1, Pfn1, Mbnl1, Ccnd3, Zfp36l2, Ddx5, H2afz, Mctp1, Cmtm7, Prtn3, Oaz1, Actg1, Ifitm2, Cox4i1, Ybx1, Maml3, Uqcrh, Cox8a, Eif5a, Rsrp1, Ptpn18, Serf2, Ripor2, Celf2, Txnip, Atp5g2, Gpx1, Eif3f, Foxn3, Hnrnpa2b1, Myl6, 2410006H16Rik, Ptpre, Iqgap2, Anp32b, Nme2, Serinc3, Arhgdib, Pde4d, Atp5e


[690]:
def normalize(array):
    min_val = array.min()
    max_val = array.max()
    return (array - min_val) / (max_val - min_val)

num_key_genes = 50

for label, gradients in gradients_per_label.items():
    df = pd.DataFrame({
        'gene_ids': adata2.var.index.values,
        'importance': gradients.cpu().detach().numpy()
    })

    sorted_df = df.sort_values(by="importance", ascending=False).head(num_key_genes)
    sorted_df['gene_importance'] = normalize(sorted_df['importance'])

    plt.figure(figsize=(12, 10))

    sns.barplot(x='gene_importance', y='gene_ids', data=sorted_df, palette='viridis')

    plt.title(f"Top {num_key_genes} Important Genes for label {label}")
    plt.xlabel('Gene Importance')
    plt.ylabel('Gene ID')
    plt.show()

../_images/notebooks_Darlin_113_0.png
../_images/notebooks_Darlin_113_1.png
[691]:
gradient_data = {}
for label, gradients in gradients_per_label.items():
    gradient_data[label] = gradients.cpu().detach().numpy()

df_gradients_all = pd.DataFrame(gradient_data, index=adata2.var.index.values)

def normalize(column):
    """Normalize a pandas column to range between 0 and 1"""
    min_val = column.min()
    max_val = column.max()
    return (column - min_val) / (max_val - min_val)

# Normalize gradients for each label
for label in labels:
    df_gradients_all[label] = normalize(df_gradients_all[label])

# Compute difference between normalized gradients for the two labels
df_gradients_all['diff'] = (df_gradients_all[labels[0]] - df_gradients_all[labels[1]])

# Select the top 20 genes with the largest gradient difference
top_genes = df_gradients_all['diff'].abs().nlargest(40).index

# Extract the normalized gradient values for the top genes
df_gradient_top = df_gradients_all.loc[top_genes, labels]

# Plot heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df_gradient_top, cmap="coolwarm", annot=True, cbar_kws={'label': 'Normalized Gradient Difference'})
plt.title("Normalized Gradient Difference of Top 20 Differentially Expressed Genes across Labels", fontsize=15)
plt.ylabel('Top Genes', fontsize=12)
plt.xlabel('Labels', fontsize=12)
plt.tight_layout()
plt.show()
../_images/notebooks_Darlin_114_0.png
[ ]:
'mkp_bias': '#1f77b4',   #
    'no mkp_bias': '#2ca02c'  # 绿色橙色
[698]:
import matplotlib.pyplot as plt
df_gradients_all_normalized = df_gradients_all.copy()
# for label in labels:
#     df_gradients_all_normalized[label] = (df_gradients_all[label] - df_gradients_all[label].min()) / (df_gradients_all[label].max() - df_gradients_all[label].min())

# 将第一个标签的值取为负数
df_gradients_all_normalized[labels[0]] = -df_gradients_all_normalized[labels[0]]

# 计算bias,即归一化后两个标签的梯度值之差
df_gradients_all_normalized['bias'] = df_gradients_all_normalized[labels[1]] + df_gradients_all_normalized[labels[0]]

# 从大到小排序,获取右侧的bias
sorted_df_right_bias = df_gradients_all_normalized.sort_values(by='bias', ascending=False).head(10)
sorted_df_right_bias =sorted_df_right_bias.sort_values(by='bias', ascending=True)

# 从小到大排序,获取左侧的bias
sorted_df_left_bias = df_gradients_all_normalized.sort_values(by='bias').head(10)
sorted_df_left_bias =sorted_df_left_bias.sort_values(by='bias', ascending=False)
# 合并两个排序的数据框,然后重新排序
top_genes_bias = pd.concat([sorted_df_left_bias, sorted_df_right_bias])
#top_genes_bias = top_genes_bias.sort_values(by='bias', key=abs, ascending=False)

fig, ax = plt.subplots(figsize=(12, 10))

# 为每个标签定义颜色
colors = ['#1f77b4', '#2ca02c']

# 为每个基因和每个标签画点和线
for gene in top_genes_bias.index:
    for i, label in enumerate(labels):
        x_value = top_genes_bias.loc[gene, label]
        y_value = list(top_genes_bias.index).index(gene)
        ax.plot(x_value, y_value, marker='o', color=colors[i])
        if i == 0:
            x_values = [top_genes_bias.loc[gene, labels[0]], top_genes_bias.loc[gene, labels[1]]]
            y_values = [y_value, y_value]
            ax.plot(x_values, y_values, linestyle='-', color='grey')

# 画0处的虚线
ax.axvline(0, color='black', linestyle='--')

# 在bias切换方向的地方添加虚线
divider_position = 10
ax.axhline(divider_position - 0.5, color='green', linestyle='--')

ax.set_yticks(range(len(top_genes_bias.index)))
ax.set_yticklabels(top_genes_bias.index)
ax.set_title('Top 20 Genes with Highest Gradient Bias')
ax.set_xlabel('Normalized Gradient Value')
ax.legend(labels, loc='upper right')

plt.tight_layout()
plt.show()
fig.savefig("/home/zhengtuo/songtao/DestinyNet/rebuttal6.1/darlin_lolipop_plot_top50.pdf", dpi=1000)
../_images/notebooks_Darlin_116_0.png
[730]:
# # 计算两个label之间的差异
# for label in labels:
#     mask = adata.obs['fate_pred'] == label
#     avg_expression = adata[mask].X.mean(axis=0).tolist()
#     expression_data[label] = avg_expression

# # 转为DataFrame
# df_expression_all = pd.DataFrame(expression_data, index=adata.var.index.values)

import anndata
# df_expression_all['diff'] = df_expression_all[labels[0]] - df_expression_all[labels[1]]
#adata_raw = anndata.AnnData(X=adata2.raw.X, var=adata2.raw.var, obs=adata2.obs)
top_genes =  top_genes_bias[::-1].index
top_genes = top_genes.difference(['Prtn3'])
adata3_copy=adata3.copy()
sc.pp.scale(adata3_copy)
sc.pl.dotplot(adata3_copy,
              var_names=top_genes,
              groupby='fate_pred',
              figsize=(8.5, 3),
              dot_max=0.4,
              color_map='Reds',
              title="Top Differentially Expressed Genes across Cell Fate",
              #vmin=0.1,
              #vmax=1,
              save='darlin_dotplot_fate2.pdf')

WARNING: saving figure to file figures/dotplot_darlin_dotplot_fate2.pdf
/home/zhengtuo/miniconda3/envs/jst/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:749: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
../_images/notebooks_Darlin_117_2.png
[ ]:

[ ]:

[ ]: