this script is to illustrate the consistency between starTracer and FilterMarker

1 PFC Data

1.1FilterMarker

the result after filterMarker, we choose the top 5 marker genes for each cluster:

out.PFC.filt <- starTracer::filterMarker(x = snRNA.hpfc.sct,
                                           ident.use = "major_clust",
                                           out.brain.Seurat,
                                           num = 5)
## getting dgCMatrix from object
## calculating gathering result
## calculating expression info
## We found 5 marker genes for cluster: Astro,Oligo,LAMP5_NOS1,OPC,L2-3_CUX2,L5-6_TLE4,Micro,L5-6_THEMIS,SST,PV_SCUBE3,VIP,ID2,PV,L4_RORB,Vas
## starTracer found 5 marker genes in cluster: Astro,Oligo,LAMP5_NOS1,OPC,L2-3_CUX2,L5-6_TLE4,Micro,L5-6_THEMIS,SST,PV_SCUBE3,VIP,ID2,PV,L4_RORB,Vas, for clusters PN_dev starTracer found 0 marker genes.

frame.filt <- out.PFC.filt$marker.frame
frame.filt <- select(frame.filt,cluster,gene, avg_log2FC, pct.pos)
frame.filt
## # A tibble: 75 × 4
##    cluster gene    avg_log2FC pct.pos
##    <fct>   <chr>        <dbl>   <dbl>
##  1 Astro   Emx2         1.41    0.902
##  2 Astro   Papln        1.77    0.899
##  3 Astro   Slc14a1      1.71    0.882
##  4 Astro   Nfatc4       0.823   0.882
##  5 Astro   Pygm         0.548   0.882
##  6 Oligo   Klk6         0.925   0.964
##  7 Oligo   Prima1       1.22    0.953
##  8 Oligo   Ermn         1.44    0.946
##  9 Oligo   Nkx6-2       1.25    0.946
## 10 Oligo   Gjb1         0.640   0.945
## # … with 65 more rows

1.2Seurat

we include genes with p.adjusted value less than 0.05. Marker genes are then arranged by avg_foldchange: note that we first include top 20 genes in each cluster as these genes may be shared by multiple clusters. To insure yielding 5 marker genes in each cluster after removing such duplication, we include 20 genes.

#including the top 20 genes of Seurat's result
gene.seurat <- subset(out.brain.Seurat,out.brain.Seurat$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))
gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,20))) %>% unlist() %>% as.vector() %>% rev()

#generating the matrix of the marker genes
frame.seurat <- out.brain.Seurat[,c("cluster","gene","avg_log2FC")] %>% subset(out.brain.Seurat$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

#finding the top 5 marker genes in each cluster
frame.seurat <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前5行
  ungroup()
frame.seurat$cluster %>% table()
## .
##       Astro       Oligo  LAMP5_NOS1         OPC   L2-3_CUX2   L5-6_TLE4 
##           5           5           5           5           5           5 
##       Micro L5-6_THEMIS         SST   PV_SCUBE3         VIP         ID2 
##           5           5           5           5           5           5 
##          PV     L4_RORB         Vas      PN_dev 
##           5           5           5           0
#calculating the specificity level of Seurat
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "major_clust",as.data.frame(frame.seurat))
frame.seurat
##        cluster     gene avg_log2FC     pct.pos
## 1        Astro   Slc1a2   5.480720 0.284341729
## 2        Astro     Gpc5   4.957582 0.377946418
## 3        Astro    Hpse2   4.177952 0.529110936
## 4        Astro   Slc4a4   3.922193 0.383148448
## 5        Astro   Slc1a3   3.732490 0.265336290
## 6        Oligo     St18   4.759271 0.772773240
## 7        Oligo     Plp1   4.597798 0.752325759
## 8        Oligo   Rnf220   4.575124 0.661486352
## 9        Oligo   Ctnna3   4.233466 0.602961620
## 10       Oligo      Mbp   4.176547 0.602069216
## 11  LAMP5_NOS1    Fgf13   4.080893 0.025596529
## 12  LAMP5_NOS1    Unc5d   3.248616 0.011481950
## 13  LAMP5_NOS1    Ptprt   3.077178 0.013079933
## 14  LAMP5_NOS1    Pdgfd   3.056775 0.047288503
## 15  LAMP5_NOS1    Myo16   3.030795 0.023892894
## 16         OPC   Lhfpl3   5.040760 0.253117020
## 17         OPC      Tnr   4.227935 0.161946818
## 18         OPC   Pcdh15   4.112117 0.228098725
## 19         OPC     Vcan   3.766387 0.477781092
## 20         OPC   Ptprz1   3.631523 0.203212460
## 21   L2-3_CUX2    Cbln2   3.039206 0.574445240
## 22   L2-3_CUX2     Cux2   2.801899 0.510708144
## 23   L2-3_CUX2   Kcnip4   2.716387 0.168966389
## 24   L2-3_CUX2     Ldb2   2.703965 0.332365311
## 25   L2-3_CUX2   Hs6st3   2.657526 0.294060542
## 26   L5-6_TLE4   Hs3st4   3.511130 0.060675676
## 27   L5-6_TLE4    Asic2   3.423103 0.036764706
## 28   L5-6_TLE4    Htr2c   2.137621 0.097633136
## 29   L5-6_TLE4    Olfm3   2.088341 0.061197917
## 30   L5-6_TLE4     Ryr2   2.085110 0.032796661
## 31       Micro  Apbb1ip   4.420732 0.551773050
## 32       Micro  St6gal1   4.359377 0.204894515
## 33       Micro    Dock8   4.353930 0.597151577
## 34       Micro   P2ry12   3.905445 0.528429838
## 35       Micro   Inpp5d   3.849866 0.504420661
## 36 L5-6_THEMIS   Clstn2   2.900992 0.045155092
## 37 L5-6_THEMIS     Nrg1   2.601541 0.041488949
## 38 L5-6_THEMIS  Zfp804b   2.481906 0.046849758
## 39 L5-6_THEMIS  Galnt14   2.215353 0.071462945
## 40 L5-6_THEMIS     Erc2   2.174543 0.024935577
## 41         SST    Grik1   4.494872 0.118820158
## 42         SST    Robo2   3.113389 0.066098222
## 43         SST     Etl4   3.077490 0.096439313
## 44         SST     Sgcz   3.064211 0.072071191
## 45         SST    Nxph1   2.901057 0.167428475
## 46   PV_SCUBE3    Cntn5   4.545029 0.013959649
## 47   PV_SCUBE3    Dpp10   3.397631 0.008140709
## 48   PV_SCUBE3      Alk   3.317869 0.023780594
## 49   PV_SCUBE3     Sdk1   3.094766 0.015495985
## 50   PV_SCUBE3     Car8   2.884532 0.044618529
## 51         VIP    Synpr   3.043661 0.154689642
## 52         VIP  Galntl6   3.006653 0.085560935
## 53         VIP  Slc24a3   2.342765 0.111785973
## 54         VIP    Rgs12   2.315500 0.082802548
## 55         VIP    Vwc2l   2.228861 0.234921817
## 56         ID2     Reln   4.344527 0.174474960
## 57         ID2   Adarb2   3.899625 0.054311840
## 58         ID2   Inpp4b   3.085062 0.074331246
## 59         ID2    Fstl5   3.036329 0.108388772
## 60         ID2   Cxcl14   2.931708 0.232990506
## 61          PV   Btbd11   3.111933 0.161655255
## 62          PV  Zfp804a   2.906055 0.072614388
## 63          PV    Kcnc2   2.803040 0.064637443
## 64          PV    Erbb4   2.715471 0.027371726
## 65          PV  Cntnap2   2.444995 0.030250461
## 66     L4_RORB Il1rapl2   3.175775 0.188160089
## 67     L4_RORB    Tshz2   2.966674 0.162399793
## 68     L4_RORB   Pou6f2   2.471191 0.129877267
## 69     L4_RORB    Fstl4   2.465454 0.103232382
## 70     L4_RORB    Cpne4   2.451890 0.122004357
## 71         Vas     Flt1   4.278392 0.041095890
## 72         Vas     Ebf1   4.173325 0.242774566
## 73         Vas  Colec12   3.987983 0.024793388
## 74         Vas   Cobll1   3.828993 0.034664657
## 75         Vas   Atp10a   3.714229 0.041292639

Specificity level difference

first, we will generate the matrix of starTracer

frame.pct.pos <- data.frame(cluster = frame.filt$cluster,pct.filt = frame.filt$pct.pos,pct.seurat = frame.seurat$pct.pos)
frame.pct.pos
##        cluster pct.filt  pct.seurat
## 1        Astro    0.902 0.284341729
## 2        Astro    0.899 0.377946418
## 3        Astro    0.882 0.529110936
## 4        Astro    0.882 0.383148448
## 5        Astro    0.882 0.265336290
## 6        Oligo    0.964 0.772773240
## 7        Oligo    0.953 0.752325759
## 8        Oligo    0.946 0.661486352
## 9        Oligo    0.946 0.602961620
## 10       Oligo    0.945 0.602069216
## 11  LAMP5_NOS1    0.149 0.025596529
## 12  LAMP5_NOS1    0.135 0.011481950
## 13  LAMP5_NOS1    0.103 0.013079933
## 14  LAMP5_NOS1    0.093 0.047288503
## 15  LAMP5_NOS1    0.079 0.023892894
## 16         OPC    0.776 0.253117020
## 17         OPC    0.772 0.161946818
## 18         OPC    0.772 0.228098725
## 19         OPC    0.756 0.477781092
## 20         OPC    0.708 0.203212460
## 21   L2-3_CUX2    0.864 0.574445240
## 22   L2-3_CUX2    0.814 0.510708144
## 23   L2-3_CUX2    0.801 0.168966389
## 24   L2-3_CUX2    0.769 0.332365311
## 25   L2-3_CUX2    0.762 0.294060542
## 26   L5-6_TLE4    0.248 0.060675676
## 27   L5-6_TLE4    0.240 0.036764706
## 28   L5-6_TLE4    0.178 0.097633136
## 29   L5-6_TLE4    0.168 0.061197917
## 30   L5-6_TLE4    0.167 0.032796661
## 31       Micro    0.862 0.551773050
## 32       Micro    0.855 0.204894515
## 33       Micro    0.851 0.597151577
## 34       Micro    0.846 0.528429838
## 35       Micro    0.839 0.504420661
## 36 L5-6_THEMIS    0.268 0.045155092
## 37 L5-6_THEMIS    0.208 0.041488949
## 38 L5-6_THEMIS    0.165 0.046849758
## 39 L5-6_THEMIS    0.151 0.071462945
## 40 L5-6_THEMIS    0.114 0.024935577
## 41         SST    0.634 0.118820158
## 42         SST    0.575 0.066098222
## 43         SST    0.554 0.096439313
## 44         SST    0.466 0.072071191
## 45         SST    0.407 0.167428475
## 46   PV_SCUBE3    0.351 0.013959649
## 47   PV_SCUBE3    0.252 0.008140709
## 48   PV_SCUBE3    0.202 0.023780594
## 49   PV_SCUBE3    0.192 0.015495985
## 50   PV_SCUBE3    0.145 0.044618529
## 51         VIP    0.783 0.154689642
## 52         VIP    0.717 0.085560935
## 53         VIP    0.667 0.111785973
## 54         VIP    0.606 0.082802548
## 55         VIP    0.564 0.234921817
## 56         ID2    0.831 0.174474960
## 57         ID2    0.329 0.054311840
## 58         ID2    0.316 0.074331246
## 59         ID2    0.298 0.108388772
## 60         ID2    0.252 0.232990506
## 61          PV    0.576 0.161655255
## 62          PV    0.385 0.072614388
## 63          PV    0.372 0.064637443
## 64          PV    0.270 0.027371726
## 65          PV    0.239 0.030250461
## 66     L4_RORB    0.430 0.188160089
## 67     L4_RORB    0.419 0.162399793
## 68     L4_RORB    0.332 0.129877267
## 69     L4_RORB    0.310 0.103232382
## 70     L4_RORB    0.294 0.122004357
## 71         Vas    0.909 0.041095890
## 72         Vas    0.684 0.242774566
## 73         Vas    0.600 0.024793388
## 74         Vas    0.538 0.034664657
## 75         Vas    0.533 0.041292639

for each cluster, we calculate the mean levels of specificity. Then we can calculate the adjusted pct.pos from before and after filterMarker

#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)
#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.filt.adj <- frame.pct.pos$pct.filt/frame.pct.pos$pct.seurat.mean
frame.pct.pos
##        cluster pct.filt  pct.seurat pct.seurat.mean pct.seurat.adj pct.filt.adj
## 1        Astro    0.902 0.284341729      0.36797676      0.7727165     2.451242
## 2        Astro    0.899 0.377946418      0.36797676      1.0270932     2.443089
## 3        Astro    0.882 0.529110936      0.36797676      1.4378922     2.396890
## 4        Astro    0.882 0.383148448      0.36797676      1.0412300     2.396890
## 5        Astro    0.882 0.265336290      0.36797676      0.7210681     2.396890
## 6        Oligo    0.964 0.772773240      0.67832324      1.1392404     1.421151
## 7        Oligo    0.953 0.752325759      0.67832324      1.1090963     1.404935
## 8        Oligo    0.946 0.661486352      0.67832324      0.9751787     1.394615
## 9        Oligo    0.946 0.602961620      0.67832324      0.8889001     1.394615
## 10       Oligo    0.945 0.602069216      0.67832324      0.8875845     1.393141
## 11  LAMP5_NOS1    0.149 0.025596529      0.02426796      1.0547457     6.139782
## 12  LAMP5_NOS1    0.135 0.011481950      0.02426796      0.4731320     5.562890
## 13  LAMP5_NOS1    0.103 0.013079933      0.02426796      0.5389795     4.244279
## 14  LAMP5_NOS1    0.093 0.047288503      0.02426796      1.9485981     3.832213
## 15  LAMP5_NOS1    0.079 0.023892894      0.02426796      0.9845447     3.255321
## 16         OPC    0.776 0.253117020      0.26483122      0.9557673     2.930168
## 17         OPC    0.772 0.161946818      0.26483122      0.6115095     2.915064
## 18         OPC    0.772 0.228098725      0.26483122      0.8612985     2.915064
## 19         OPC    0.756 0.477781092      0.26483122      1.8040965     2.854648
## 20         OPC    0.708 0.203212460      0.26483122      0.7673282     2.673401
## 21   L2-3_CUX2    0.864 0.574445240      0.37610913      1.5273366     2.297206
## 22   L2-3_CUX2    0.814 0.510708144      0.37610913      1.3578723     2.164265
## 23   L2-3_CUX2    0.801 0.168966389      0.37610913      0.4492483     2.129701
## 24   L2-3_CUX2    0.769 0.332365311      0.37610913      0.8836938     2.044619
## 25   L2-3_CUX2    0.762 0.294060542      0.37610913      0.7818490     2.026008
## 26   L5-6_TLE4    0.248 0.060675676      0.05781362      1.0495049     4.289647
## 27   L5-6_TLE4    0.240 0.036764706      0.05781362      0.6359177     4.151271
## 28   L5-6_TLE4    0.178 0.097633136      0.05781362      1.6887567     3.078859
## 29   L5-6_TLE4    0.168 0.061197917      0.05781362      1.0585381     2.905890
## 30   L5-6_TLE4    0.167 0.032796661      0.05781362      0.5672826     2.888593
## 31       Micro    0.862 0.551773050      0.47733393      1.1559477     1.805864
## 32       Micro    0.855 0.204894515      0.47733393      0.4292478     1.791199
## 33       Micro    0.851 0.597151577      0.47733393      1.2510143     1.782819
## 34       Micro    0.846 0.528429838      0.47733393      1.1070444     1.772344
## 35       Micro    0.839 0.504420661      0.47733393      1.0567459     1.757679
## 36 L5-6_THEMIS    0.268 0.045155092      0.04597846      0.9820922     5.828816
## 37 L5-6_THEMIS    0.208 0.041488949      0.04597846      0.9023561     4.523857
## 38 L5-6_THEMIS    0.165 0.046849758      0.04597846      1.0189500     3.588637
## 39 L5-6_THEMIS    0.151 0.071462945      0.04597846      1.5542699     3.284146
## 40 L5-6_THEMIS    0.114 0.024935577      0.04597846      0.5423317     2.479422
## 41         SST    0.634 0.118820158      0.10417147      1.1406209     6.086119
## 42         SST    0.575 0.066098222      0.10417147      0.6345137     5.519745
## 43         SST    0.554 0.096439313      0.10417147      0.9257747     5.318155
## 44         SST    0.466 0.072071191      0.10417147      0.6918515     4.473394
## 45         SST    0.407 0.167428475      0.10417147      1.6072392     3.907020
## 46   PV_SCUBE3    0.351 0.013959649      0.02119909      0.6585022    16.557312
## 47   PV_SCUBE3    0.252 0.008140709      0.02119909      0.3840122    11.887301
## 48   PV_SCUBE3    0.202 0.023780594      0.02119909      1.1217741     9.528709
## 49   PV_SCUBE3    0.192 0.015495985      0.02119909      0.7309739     9.056991
## 50   PV_SCUBE3    0.145 0.044618529      0.02119909      2.1047376     6.839915
## 51         VIP    0.783 0.154689642      0.13395218      1.1548124     5.845369
## 52         VIP    0.717 0.085560935      0.13395218      0.6387424     5.352656
## 53         VIP    0.667 0.111785973      0.13395218      0.8345215     4.979389
## 54         VIP    0.606 0.082802548      0.13395218      0.6181500     4.524002
## 55         VIP    0.564 0.234921817      0.13395218      1.7537737     4.210458
## 56         ID2    0.831 0.174474960      0.12889946      1.3535740     6.446885
## 57         ID2    0.329 0.054311840      0.12889946      0.4213504     2.552377
## 58         ID2    0.316 0.074331246      0.12889946      0.5766606     2.451523
## 59         ID2    0.298 0.108388772      0.12889946      0.8408784     2.311879
## 60         ID2    0.252 0.232990506      0.12889946      1.8075366     1.955012
## 61          PV    0.576 0.161655255      0.07130585      2.2670685     8.077878
## 62          PV    0.385 0.072614388      0.07130585      1.0183510     5.399276
## 63          PV    0.372 0.064637443      0.07130585      0.9064816     5.216963
## 64          PV    0.270 0.027371726      0.07130585      0.3838637     3.786505
## 65          PV    0.239 0.030250461      0.07130585      0.4242353     3.351758
## 66     L4_RORB    0.430 0.188160089      0.14113478      1.3331944     3.046733
## 67     L4_RORB    0.419 0.162399793      0.14113478      1.1506717     2.968793
## 68     L4_RORB    0.332 0.129877267      0.14113478      0.9202357     2.352361
## 69     L4_RORB    0.310 0.103232382      0.14113478      0.7314454     2.196482
## 70     L4_RORB    0.294 0.122004357      0.14113478      0.8644528     2.083115
## 71         Vas    0.909 0.041095890      0.07692423      0.5342386    11.816823
## 72         Vas    0.684 0.242774566      0.07692423      3.1560221     8.891867
## 73         Vas    0.600 0.024793388      0.07692423      0.3223092     7.799883
## 74         Vas    0.538 0.034664657      0.07692423      0.4506338     6.993895
## 75         Vas    0.533 0.041292639      0.07692423      0.5367963     6.928896

arrange the values by the mean of the values after filterMarker for plotting

#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.filt.adj.mean <- rep(aggregate(pct.filt.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.filt.adj,each = 5)
#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.filt.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))
frame.pct.pos
##        cluster pct.filt  pct.seurat pct.seurat.mean pct.seurat.adj pct.filt.adj
## 1    PV_SCUBE3    0.351 0.013959649      0.02119909      0.6585022    16.557312
## 2    PV_SCUBE3    0.252 0.008140709      0.02119909      0.3840122    11.887301
## 3    PV_SCUBE3    0.202 0.023780594      0.02119909      1.1217741     9.528709
## 4    PV_SCUBE3    0.192 0.015495985      0.02119909      0.7309739     9.056991
## 5    PV_SCUBE3    0.145 0.044618529      0.02119909      2.1047376     6.839915
## 6          Vas    0.909 0.041095890      0.07692423      0.5342386    11.816823
## 7          Vas    0.684 0.242774566      0.07692423      3.1560221     8.891867
## 8          Vas    0.600 0.024793388      0.07692423      0.3223092     7.799883
## 9          Vas    0.538 0.034664657      0.07692423      0.4506338     6.993895
## 10         Vas    0.533 0.041292639      0.07692423      0.5367963     6.928896
## 11          PV    0.576 0.161655255      0.07130585      2.2670685     8.077878
## 12          PV    0.385 0.072614388      0.07130585      1.0183510     5.399276
## 13          PV    0.372 0.064637443      0.07130585      0.9064816     5.216963
## 14          PV    0.270 0.027371726      0.07130585      0.3838637     3.786505
## 15          PV    0.239 0.030250461      0.07130585      0.4242353     3.351758
## 16         SST    0.634 0.118820158      0.10417147      1.1406209     6.086119
## 17         SST    0.575 0.066098222      0.10417147      0.6345137     5.519745
## 18         SST    0.554 0.096439313      0.10417147      0.9257747     5.318155
## 19         SST    0.466 0.072071191      0.10417147      0.6918515     4.473394
## 20         SST    0.407 0.167428475      0.10417147      1.6072392     3.907020
## 21         VIP    0.783 0.154689642      0.13395218      1.1548124     5.845369
## 22         VIP    0.717 0.085560935      0.13395218      0.6387424     5.352656
## 23         VIP    0.667 0.111785973      0.13395218      0.8345215     4.979389
## 24         VIP    0.606 0.082802548      0.13395218      0.6181500     4.524002
## 25         VIP    0.564 0.234921817      0.13395218      1.7537737     4.210458
## 26  LAMP5_NOS1    0.149 0.025596529      0.02426796      1.0547457     6.139782
## 27  LAMP5_NOS1    0.135 0.011481950      0.02426796      0.4731320     5.562890
## 28  LAMP5_NOS1    0.103 0.013079933      0.02426796      0.5389795     4.244279
## 29  LAMP5_NOS1    0.093 0.047288503      0.02426796      1.9485981     3.832213
## 30  LAMP5_NOS1    0.079 0.023892894      0.02426796      0.9845447     3.255321
## 31 L5-6_THEMIS    0.268 0.045155092      0.04597846      0.9820922     5.828816
## 32 L5-6_THEMIS    0.208 0.041488949      0.04597846      0.9023561     4.523857
## 33 L5-6_THEMIS    0.165 0.046849758      0.04597846      1.0189500     3.588637
## 34 L5-6_THEMIS    0.151 0.071462945      0.04597846      1.5542699     3.284146
## 35 L5-6_THEMIS    0.114 0.024935577      0.04597846      0.5423317     2.479422
## 36   L5-6_TLE4    0.248 0.060675676      0.05781362      1.0495049     4.289647
## 37   L5-6_TLE4    0.240 0.036764706      0.05781362      0.6359177     4.151271
## 38   L5-6_TLE4    0.178 0.097633136      0.05781362      1.6887567     3.078859
## 39   L5-6_TLE4    0.168 0.061197917      0.05781362      1.0585381     2.905890
## 40   L5-6_TLE4    0.167 0.032796661      0.05781362      0.5672826     2.888593
## 41         ID2    0.831 0.174474960      0.12889946      1.3535740     6.446885
## 42         ID2    0.329 0.054311840      0.12889946      0.4213504     2.552377
## 43         ID2    0.316 0.074331246      0.12889946      0.5766606     2.451523
## 44         ID2    0.298 0.108388772      0.12889946      0.8408784     2.311879
## 45         ID2    0.252 0.232990506      0.12889946      1.8075366     1.955012
## 46         OPC    0.776 0.253117020      0.26483122      0.9557673     2.930168
## 47         OPC    0.772 0.161946818      0.26483122      0.6115095     2.915064
## 48         OPC    0.772 0.228098725      0.26483122      0.8612985     2.915064
## 49         OPC    0.756 0.477781092      0.26483122      1.8040965     2.854648
## 50         OPC    0.708 0.203212460      0.26483122      0.7673282     2.673401
## 51     L4_RORB    0.430 0.188160089      0.14113478      1.3331944     3.046733
## 52     L4_RORB    0.419 0.162399793      0.14113478      1.1506717     2.968793
## 53     L4_RORB    0.332 0.129877267      0.14113478      0.9202357     2.352361
## 54     L4_RORB    0.310 0.103232382      0.14113478      0.7314454     2.196482
## 55     L4_RORB    0.294 0.122004357      0.14113478      0.8644528     2.083115
## 56       Astro    0.902 0.284341729      0.36797676      0.7727165     2.451242
## 57       Astro    0.899 0.377946418      0.36797676      1.0270932     2.443089
## 58       Astro    0.882 0.529110936      0.36797676      1.4378922     2.396890
## 59       Astro    0.882 0.383148448      0.36797676      1.0412300     2.396890
## 60       Astro    0.882 0.265336290      0.36797676      0.7210681     2.396890
## 61   L2-3_CUX2    0.864 0.574445240      0.37610913      1.5273366     2.297206
## 62   L2-3_CUX2    0.814 0.510708144      0.37610913      1.3578723     2.164265
## 63   L2-3_CUX2    0.801 0.168966389      0.37610913      0.4492483     2.129701
## 64   L2-3_CUX2    0.769 0.332365311      0.37610913      0.8836938     2.044619
## 65   L2-3_CUX2    0.762 0.294060542      0.37610913      0.7818490     2.026008
## 66       Micro    0.862 0.551773050      0.47733393      1.1559477     1.805864
## 67       Micro    0.855 0.204894515      0.47733393      0.4292478     1.791199
## 68       Micro    0.851 0.597151577      0.47733393      1.2510143     1.782819
## 69       Micro    0.846 0.528429838      0.47733393      1.1070444     1.772344
## 70       Micro    0.839 0.504420661      0.47733393      1.0567459     1.757679
## 71       Oligo    0.964 0.772773240      0.67832324      1.1392404     1.421151
## 72       Oligo    0.953 0.752325759      0.67832324      1.1090963     1.404935
## 73       Oligo    0.946 0.661486352      0.67832324      0.9751787     1.394615
## 74       Oligo    0.946 0.602961620      0.67832324      0.8889001     1.394615
## 75       Oligo    0.945 0.602069216      0.67832324      0.8875845     1.393141
##    pct.filt.adj.mean
## 1          10.774046
## 2          10.774046
## 3          10.774046
## 4          10.774046
## 5          10.774046
## 6           8.486273
## 7           8.486273
## 8           8.486273
## 9           8.486273
## 10          8.486273
## 11          5.166476
## 12          5.166476
## 13          5.166476
## 14          5.166476
## 15          5.166476
## 16          5.060887
## 17          5.060887
## 18          5.060887
## 19          5.060887
## 20          5.060887
## 21          4.982375
## 22          4.982375
## 23          4.982375
## 24          4.982375
## 25          4.982375
## 26          4.606897
## 27          4.606897
## 28          4.606897
## 29          4.606897
## 30          4.606897
## 31          3.940975
## 32          3.940975
## 33          3.940975
## 34          3.940975
## 35          3.940975
## 36          3.462852
## 37          3.462852
## 38          3.462852
## 39          3.462852
## 40          3.462852
## 41          3.143535
## 42          3.143535
## 43          3.143535
## 44          3.143535
## 45          3.143535
## 46          2.857669
## 47          2.857669
## 48          2.857669
## 49          2.857669
## 50          2.857669
## 51          2.529497
## 52          2.529497
## 53          2.529497
## 54          2.529497
## 55          2.529497
## 56          2.417000
## 57          2.417000
## 58          2.417000
## 59          2.417000
## 60          2.417000
## 61          2.132360
## 62          2.132360
## 63          2.132360
## 64          2.132360
## 65          2.132360
## 66          1.781981
## 67          1.781981
## 68          1.781981
## 69          1.781981
## 70          1.781981
## 71          1.401692
## 72          1.401692
## 73          1.401692
## 74          1.401692
## 75          1.401692

plot

mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","dark red"),
          position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif",method = "t.test") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))

2 Heart Data

2.1FilterMarker

out.heart.filt <- starTracer::filterMarker(x = heart_big,
                                           ident.use = "cell_type__ontology_label",
                                           heart_big.Seurat,
                                           num = 5)
frame.filt <- out.heart.filt$marker.frame
frame.filt <- select(frame.filt,cluster,gene, avg_log2FC, pct.pos)
frame.filt
## # A tibble: 65 × 4
##    cluster             gene       avg_log2FC pct.pos
##    <fct>               <chr>           <dbl>   <dbl>
##  1 cardiac muscle cell HSPB3           0.523   0.999
##  2 cardiac muscle cell HSPB7           1.16    0.998
##  3 cardiac muscle cell SMPX            1.12    0.998
##  4 cardiac muscle cell LINC01954       1.54    0.998
##  5 cardiac muscle cell AC002057.2      0.829   0.998
##  6 fat cell            PCK1            1.18    0.936
##  7 fat cell            ADIPOQ-AS1      1.06    0.933
##  8 fat cell            ADIPOQ          3.13    0.933
##  9 fat cell            CIDEC           1.77    0.909
## 10 fat cell            PLIN1           2.86    0.874
## # … with 55 more rows

2.2 Seurat

#including the top 20 genes of Seurat's result
gene.seurat <- subset(heart_big.Seurat,heart_big.Seurat$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))
gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,20))) %>% unlist() %>% as.vector() %>% rev()

#generating the matrix of the marker genes
frame.seurat <- heart_big.Seurat[,c("cluster","gene","avg_log2FC")] %>% subset(heart_big.Seurat$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

#finding the top 5 marker genes in each cluster
frame.seurat <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前5行
  ungroup()
frame.seurat$cluster %>% table()
## .
##                    cardiac muscle cell                               fat cell 
##                                      5                                      5 
##                             macrophage                       endocardial cell 
##                                      5                                      5 
##           cardiac ventricle fibroblast               cardiac endothelial cell 
##                                      5                                      5 
##                         cardiac neuron   endothelial cell of lymphatic vessel 
##                                      5                                      5 
## vascular associated smooth muscle cell                          pericyte cell 
##                                      5                                      5 
##                              mast cell                             lymphocyte 
##                                      5                                      5 
##                       mesothelial cell 
##                                      5
#calculating the specificity level of Seurat (here we should use cal_pctpos, otherwise the results will be wrong.)
frame.seurat <- cal_pctpos(heart_big,ident.use = "cell_type__ontology_label",as.data.frame(frame.seurat))
## getting dgCMatrix from object
## calculating gathering result
## calculating expression info
## `summarise()` has grouped output by 'cell_type__ontology_label'. You can
## override using the `.groups` argument.
frame.seurat
##                                   cluster       gene avg_log2FC pct.pos
## 1                     cardiac muscle cell       RYR2   6.500125   0.811
## 2                     cardiac muscle cell        TTN   6.076185   0.786
## 3                     cardiac muscle cell      FGF12   5.598094   0.975
## 4                     cardiac muscle cell     SORBS2   5.399656   0.911
## 5                     cardiac muscle cell    TTN-AS1   5.261100   0.874
## 6                                fat cell       GPAM   4.456875   0.178
## 7                                fat cell      ACACB   3.768680   0.026
## 8                                fat cell      PDE3B   3.508428   0.033
## 9                                fat cell   TMEM132C   3.359539   0.130
## 10                               fat cell  LINC00598   3.357553   0.119
## 11                             macrophage      F13A1   5.491663   0.868
## 12                             macrophage       RBPJ   4.450230   0.304
## 13                             macrophage       MRC1   4.337762   0.847
## 14                             macrophage      RBM47   4.151512   0.884
## 15                             macrophage      CD163   3.969400   0.914
## 16                       endocardial cell       NRG1   5.035721   0.226
## 17                       endocardial cell  LINC02388   4.714143   0.131
## 18                       endocardial cell    PKHD1L1   4.565855   0.469
## 19                       endocardial cell AC020637.1   3.875186   0.131
## 20                       endocardial cell   ADAMTSL1   3.862765   0.138
## 21           cardiac ventricle fibroblast        DCN   4.270147   0.923
## 22           cardiac ventricle fibroblast      NEGR1   4.260118   0.874
## 23           cardiac ventricle fibroblast      ACSM3   4.129375   0.837
## 24           cardiac ventricle fibroblast      BICC1   3.953330   0.875
## 25           cardiac ventricle fibroblast      CDH19   3.678982   0.902
## 26               cardiac endothelial cell        VWF   4.349443   0.818
## 27               cardiac endothelial cell     PIK3R3   4.189585   0.511
## 28               cardiac endothelial cell       ANO2   4.101193   0.816
## 29               cardiac endothelial cell ST6GALNAC3   3.977434   0.705
## 30               cardiac endothelial cell       LDB2   3.829827   0.553
## 31                         cardiac neuron      NRXN1   6.993441   0.364
## 32                         cardiac neuron       XKR4   6.268396   0.450
## 33                         cardiac neuron      CADM2   4.854013   0.124
## 34                         cardiac neuron      NRXN3   4.552645   0.070
## 35                         cardiac neuron    KIRREL3   4.491211   0.136
## 36   endothelial cell of lymphatic vessel      MMRN1   3.917387   0.677
## 37   endothelial cell of lymphatic vessel     SEMA3A   3.486745   0.125
## 38   endothelial cell of lymphatic vessel AL357507.1   3.470161   0.075
## 39   endothelial cell of lymphatic vessel      GPM6A   3.462596   0.374
## 40   endothelial cell of lymphatic vessel       RELN   3.121926   0.223
## 41 vascular associated smooth muscle cell      MYH11   4.195502   0.255
## 42 vascular associated smooth muscle cell      ZFHX3   3.419977   0.061
## 43 vascular associated smooth muscle cell      NTRK3   3.373406   0.483
## 44 vascular associated smooth muscle cell      CARMN   3.344995   0.129
## 45 vascular associated smooth muscle cell      ITGA8   3.099863   0.307
## 46                          pericyte cell    GUCY1A2   4.052020   0.708
## 47                          pericyte cell     EGFLAM   3.922886   0.782
## 48                          pericyte cell       DLC1   3.618893   0.191
## 49                          pericyte cell       RGS5   3.606787   0.735
## 50                          pericyte cell      ABCC9   3.537733   0.307
## 51                              mast cell    SLC24A3   5.495358   0.110
## 52                              mast cell        NTM   5.430806   0.104
## 53                              mast cell        KIT   5.033447   0.510
## 54                              mast cell     IL18R1   5.010671   0.118
## 55                              mast cell       CPA3   4.540015   0.825
## 56                             lymphocyte      SKAP1   4.471021   0.670
## 57                             lymphocyte      PTPRC   4.008349   0.270
## 58                             lymphocyte     THEMIS   3.961787   0.833
## 59                             lymphocyte      CD247   3.934955   0.658
## 60                             lymphocyte       AOAH   3.676797   0.212
## 61                       mesothelial cell    SLC39A8   5.236098   0.020
## 62                       mesothelial cell         C3   4.702693   0.010
## 63                       mesothelial cell AC083837.1   4.676874   0.019
## 64                       mesothelial cell      CEMIP   4.277572   0.002
## 65                       mesothelial cell       WWC1   4.196452   0.053

2.3 Find difference in specificity

frame.pct.pos <- data.frame(cluster = frame.filt$cluster,pct.filt = frame.filt$pct.pos,pct.seurat = frame.seurat$pct.pos)
frame.pct.pos
##                                   cluster  pct.filt pct.seurat
## 1                     cardiac muscle cell 0.9988837      0.811
## 2                     cardiac muscle cell 0.9984708      0.786
## 3                     cardiac muscle cell 0.9984135      0.975
## 4                     cardiac muscle cell 0.9983669      0.911
## 5                     cardiac muscle cell 0.9982444      0.874
## 6                                fat cell 0.9355386      0.178
## 7                                fat cell 0.9326861      0.026
## 8                                fat cell 0.9325738      0.033
## 9                                fat cell 0.9094412      0.130
## 10                               fat cell 0.8738428      0.119
## 11                             macrophage 0.9737773      0.868
## 12                             macrophage 0.9684022      0.304
## 13                             macrophage 0.9626516      0.847
## 14                             macrophage 0.9553298      0.884
## 15                             macrophage 0.9531663      0.914
## 16                       endocardial cell 0.6039005      0.226
## 17                       endocardial cell 0.5087369      0.131
## 18                       endocardial cell 0.4689078      0.469
## 19                       endocardial cell 0.3619917      0.131
## 20                       endocardial cell 0.2995041      0.138
## 21           cardiac ventricle fibroblast 0.9737996      0.923
## 22           cardiac ventricle fibroblast 0.9631058      0.874
## 23           cardiac ventricle fibroblast 0.9600514      0.837
## 24           cardiac ventricle fibroblast 0.9579284      0.875
## 25           cardiac ventricle fibroblast 0.9550053      0.902
## 26               cardiac endothelial cell 0.8957628      0.818
## 27               cardiac endothelial cell 0.8909054      0.511
## 28               cardiac endothelial cell 0.8895384      0.816
## 29               cardiac endothelial cell 0.8832083      0.705
## 30               cardiac endothelial cell 0.8814115      0.553
## 31                         cardiac neuron 0.8373752      0.364
## 32                         cardiac neuron 0.7362110      0.450
## 33                         cardiac neuron 0.7007348      0.124
## 34                         cardiac neuron 0.6357616      0.070
## 35                         cardiac neuron 0.6068013      0.136
## 36   endothelial cell of lymphatic vessel 0.8060960      0.677
## 37   endothelial cell of lymphatic vessel 0.6768943      0.125
## 38   endothelial cell of lymphatic vessel 0.5981254      0.075
## 39   endothelial cell of lymphatic vessel 0.5739130      0.374
## 40   endothelial cell of lymphatic vessel 0.5004115      0.223
## 41 vascular associated smooth muscle cell 0.6832740      0.255
## 42 vascular associated smooth muscle cell 0.5640234      0.061
## 43 vascular associated smooth muscle cell 0.5102112      0.483
## 44 vascular associated smooth muscle cell 0.5021017      0.129
## 45 vascular associated smooth muscle cell 0.4935132      0.307
## 46                          pericyte cell 0.8628503      0.708
## 47                          pericyte cell 0.8292884      0.782
## 48                          pericyte cell 0.7894353      0.191
## 49                          pericyte cell 0.7815931      0.735
## 50                          pericyte cell 0.7810399      0.307
## 51                              mast cell 0.8498350      0.110
## 52                              mast cell 0.8251691      0.104
## 53                              mast cell 0.8151076      0.510
## 54                              mast cell 0.8107715      0.118
## 55                              mast cell 0.7859091      0.825
## 56                             lymphocyte 0.9072495      0.670
## 57                             lymphocyte 0.9023913      0.270
## 58                             lymphocyte 0.9000000      0.833
## 59                             lymphocyte 0.8822702      0.658
## 60                             lymphocyte 0.8645651      0.212
## 61                       mesothelial cell 0.5084746      0.020
## 62                       mesothelial cell 0.3604651      0.010
## 63                       mesothelial cell 0.2792793      0.019
## 64                       mesothelial cell 0.2764228      0.002
## 65                       mesothelial cell 0.2348112      0.053
#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)

#divide seurat.mean to get star.adj and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.filt.adj <- frame.pct.pos$pct.filt/frame.pct.pos$pct.seurat.mean

#cal filt.adj.mean to arrange the plot
frame.pct.pos$pct.filt.adj.mean <- rep(aggregate(pct.filt.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.filt.adj,each = 5)

#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.filt.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))

visualize

mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","dark red"),
          position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif",method = "t.test") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))

3 Kidney Data

3.1 FilterMarker

out.kidney.filt <- starTracer::filterMarker(x = sc_kidney,
                                           ident.use = "cell_type__ontology_label",
                                           out.kidney.Seurat,
                                           num = 5)
## getting dgCMatrix from object
## calculating gathering result
## calculating expression info
## We found 5 marker genes for cluster: proximal convoluted tubule,renal beta-intercalated cell,thick ascending limb of loop of Henle,proximal straight tubule,distal convoluted tubule,kidney interstitial cell,endothelial cell,renal connecting tubule,renal alpha-intercalated cell,renal principal cell,kidney epithelial cell,glomerular visceral epithelial cell,kidney tubule cell

frame.filt <- out.kidney.filt$marker.frame
frame.filt <- select(frame.filt,cluster,gene, avg_log2FC, pct.pos)
frame.filt
## # A tibble: 65 × 4
##    cluster                      gene    avg_log2FC pct.pos
##    <fct>                        <chr>        <dbl>   <dbl>
##  1 proximal convoluted tubule   Bnc2         0.646   0.832
##  2 proximal convoluted tubule   Etv1         0.500   0.819
##  3 proximal convoluted tubule   Cml3         0.533   0.812
##  4 proximal convoluted tubule   Slc13a3      0.729   0.804
##  5 proximal convoluted tubule   G6pc         0.600   0.799
##  6 renal beta-intercalated cell Hmx2         1.15    0.825
##  7 renal beta-intercalated cell Insrr        2.09    0.779
##  8 renal beta-intercalated cell Slc26a4      2.70    0.653
##  9 renal beta-intercalated cell Gm12121      1.07    0.635
## 10 renal beta-intercalated cell Slc4a9       2.45    0.578
## # … with 55 more rows

3.2 Seurat

#including the top 20 genes of Seurat's result
gene.seurat <- subset(out.kidney.Seurat,out.kidney.Seurat$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))
gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,300))) %>% unlist() %>% as.vector() %>% rev()

#generating the matrix of the marker genes
frame.seurat <- out.kidney.Seurat[,c("cluster","gene","avg_log2FC")] %>% subset(out.kidney.Seurat$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

#finding the top 5 marker genes in each cluster
frame.seurat <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前5行
  ungroup()
frame.seurat$cluster %>% table()
## .
##            proximal convoluted tubule          renal beta-intercalated cell 
##                                     5                                     5 
## thick ascending limb of loop of Henle              proximal straight tubule 
##                                     5                                     5 
##              distal convoluted tubule              kidney interstitial cell 
##                                     5                                     5 
##                      endothelial cell               renal connecting tubule 
##                                     5                                     5 
##         renal alpha-intercalated cell                  renal principal cell 
##                                     5                                     5 
##                kidney epithelial cell   glomerular visceral epithelial cell 
##                                     0                                     5 
##                    kidney tubule cell 
##                                     5
#calculating the specificity level of Seurat
frame.seurat <- cal_spe(sc_kidney,ident.use = "cell_type__ontology_label",as.data.frame(frame.seurat))
frame.seurat
##                                  cluster     gene avg_log2FC      pct.pos
## 1             proximal convoluted tubule     Ass1  0.7977545 0.7523092713
## 2             proximal convoluted tubule   Gm5424  0.7340004 0.7615974183
## 3             proximal convoluted tubule  Slc13a3  0.7285109 0.8040141677
## 4             proximal convoluted tubule    Glis1  0.7120794 0.7881794651
## 5             proximal convoluted tubule   Cyp4b1  0.7001688 0.7400981997
## 6           renal beta-intercalated cell  Slc26a4  2.7000022 0.6527027027
## 7           renal beta-intercalated cell   Slc4a9  2.4505567 0.5775656325
## 8           renal beta-intercalated cell    Lsamp  2.0880429 0.1700000000
## 9           renal beta-intercalated cell    Insrr  2.0850130 0.7788697789
## 10          renal beta-intercalated cell  Tmem117  1.8914783 0.1769369369
## 11 thick ascending limb of loop of Henle  Slc12a1  1.3675012 0.5090491970
## 12 thick ascending limb of loop of Henle      Egf  1.0717783 0.3401506997
## 13 thick ascending limb of loop of Henle    Erbb4  1.0377491 0.3278769841
## 14 thick ascending limb of loop of Henle    Enox1  0.9822033 0.5253496503
## 15 thick ascending limb of loop of Henle     Casr  0.9247946 0.5616438356
## 16              proximal straight tubule   Cyp7b1  1.1658098 0.4406374502
## 17              proximal straight tubule    Mep1b  0.7591253 0.6064441887
## 18              proximal straight tubule     Fgf1  0.7208294 0.2814417178
## 19              proximal straight tubule    Rnf24  0.7052920 0.4003868472
## 20              proximal straight tubule    Aadat  0.6237110 0.3095836826
## 21              distal convoluted tubule  Slc12a3  1.7719445 0.3400576369
## 22              distal convoluted tubule    Trpm6  1.7316988 0.5338582677
## 23              distal convoluted tubule   Abca13  1.3102502 0.3137053227
## 24              distal convoluted tubule    Klhl3  1.2023035 0.3884436946
## 25              distal convoluted tubule  Tsc22d1  1.1199377 0.3112607557
## 26              kidney interstitial cell      Cfh  1.9680627 0.4752475248
## 27              kidney interstitial cell    Tshz2  1.8469893 0.2668896321
## 28              kidney interstitial cell   Gm2163  1.6857648 0.6218678815
## 29              kidney interstitial cell    Prkg1  1.5105461 0.1964711493
## 30              kidney interstitial cell    Pde3a  1.4663921 0.5903083700
## 31                      endothelial cell   Adgrl4  1.5675646 0.4009900990
## 32                      endothelial cell    Meis2  1.5094758 0.2144955926
## 33                      endothelial cell     Emcn  1.4978285 0.3277486911
## 34                      endothelial cell    Plpp1  1.4946007 0.3046471601
## 35                      endothelial cell     Flt1  1.4042938 0.2503546099
## 36               renal connecting tubule   Slc8a1  1.7988526 0.3149466192
## 37               renal connecting tubule    Calb1  1.5095695 0.4119749777
## 38               renal connecting tubule  Phactr1  1.4856703 0.3815468114
## 39               renal connecting tubule AI838599  1.4195251 0.5042313117
## 40               renal connecting tubule   Slc2a9  1.2771562 0.5478901541
## 41         renal alpha-intercalated cell     Aqp6  1.7513498 0.6518218623
## 42         renal alpha-intercalated cell    Rcan2  1.6175144 0.2441860465
## 43         renal alpha-intercalated cell      Kit  1.4067170 0.6533864542
## 44         renal alpha-intercalated cell     Clnk  1.3893767 0.3756476684
## 45         renal alpha-intercalated cell   Bmpr1b  1.3761364 0.1600566572
## 46                  renal principal cell   Mgat4c  2.0599192 0.3336244541
## 47                  renal principal cell   Frmpd4  1.7582055 0.2063197026
## 48                  renal principal cell    Fxyd4  1.7439609 0.3766859345
## 49                  renal principal cell   Scnn1b  1.3854806 0.2283950617
## 50                  renal principal cell  Col26a1  1.2879773 0.4856512141
## 51   glomerular visceral epithelial cell   Srgap1  2.6162360 0.0785219400
## 52   glomerular visceral epithelial cell    Synpo  2.5551251 0.1868131868
## 53   glomerular visceral epithelial cell    Robo2  2.5189427 0.0384615385
## 54   glomerular visceral epithelial cell  Gm29266  2.4039946 0.0567375887
## 55   glomerular visceral epithelial cell   Thsd7a  2.3832154 0.0426439232
## 56                    kidney tubule cell     Ispd  1.2946169 0.0006788866
## 57                    kidney tubule cell    Thsd4  1.2907358 0.0021436227
## 58                    kidney tubule cell    Ttll6  1.2614032 0.0011217050
## 59                    kidney tubule cell     Ier2  1.2465328 0.0000000000
## 60                    kidney tubule cell   Abca16  1.2165080 0.0000000000

3.3 Find DIfference in Specificity

#we should notice that by assigning genes into clusters using the value of avg_log2FC, we have 0 marker genes for epithelial cells

frame.filt <- subset(frame.filt,frame.filt$cluster %in% frame.seurat$cluster)

frame.pct.pos <- data.frame(cluster = frame.filt$cluster,pct.filt = frame.filt$pct.pos,pct.seurat = frame.seurat$pct.pos)
frame.pct.pos
##                                  cluster pct.filt   pct.seurat
## 1             proximal convoluted tubule    0.832 0.7523092713
## 2             proximal convoluted tubule    0.819 0.7615974183
## 3             proximal convoluted tubule    0.812 0.8040141677
## 4             proximal convoluted tubule    0.804 0.7881794651
## 5             proximal convoluted tubule    0.799 0.7400981997
## 6           renal beta-intercalated cell    0.825 0.6527027027
## 7           renal beta-intercalated cell    0.779 0.5775656325
## 8           renal beta-intercalated cell    0.653 0.1700000000
## 9           renal beta-intercalated cell    0.635 0.7788697789
## 10          renal beta-intercalated cell    0.578 0.1769369369
## 11 thick ascending limb of loop of Henle    0.697 0.5090491970
## 12 thick ascending limb of loop of Henle    0.619 0.3401506997
## 13 thick ascending limb of loop of Henle    0.562 0.3278769841
## 14 thick ascending limb of loop of Henle    0.562 0.5253496503
## 15 thick ascending limb of loop of Henle    0.552 0.5616438356
## 16              proximal straight tubule    0.606 0.4406374502
## 17              proximal straight tubule    0.457 0.6064441887
## 18              proximal straight tubule    0.441 0.2814417178
## 19              proximal straight tubule    0.428 0.4003868472
## 20              proximal straight tubule    0.418 0.3095836826
## 21              distal convoluted tubule    0.534 0.3400576369
## 22              distal convoluted tubule    0.528 0.5338582677
## 23              distal convoluted tubule    0.461 0.3137053227
## 24              distal convoluted tubule    0.433 0.3884436946
## 25              distal convoluted tubule    0.404 0.3112607557
## 26              kidney interstitial cell    0.668 0.4752475248
## 27              kidney interstitial cell    0.622 0.2668896321
## 28              kidney interstitial cell    0.598 0.6218678815
## 29              kidney interstitial cell    0.590 0.1964711493
## 30              kidney interstitial cell    0.530 0.5903083700
## 31                      endothelial cell    0.405 0.4009900990
## 32                      endothelial cell    0.401 0.2144955926
## 33                      endothelial cell    0.399 0.3277486911
## 34                      endothelial cell    0.365 0.3046471601
## 35                      endothelial cell    0.334 0.2503546099
## 36               renal connecting tubule    0.548 0.3149466192
## 37               renal connecting tubule    0.504 0.4119749777
## 38               renal connecting tubule    0.496 0.3815468114
## 39               renal connecting tubule    0.495 0.5042313117
## 40               renal connecting tubule    0.412 0.5478901541
## 41         renal alpha-intercalated cell    0.653 0.6518218623
## 42         renal alpha-intercalated cell    0.652 0.2441860465
## 43         renal alpha-intercalated cell    0.376 0.6533864542
## 44         renal alpha-intercalated cell    0.331 0.3756476684
## 45         renal alpha-intercalated cell    0.247 0.1600566572
## 46                  renal principal cell    0.486 0.3336244541
## 47                  renal principal cell    0.377 0.2063197026
## 48                  renal principal cell    0.334 0.3766859345
## 49                  renal principal cell    0.311 0.2283950617
## 50                  renal principal cell    0.299 0.4856512141
## 51   glomerular visceral epithelial cell    0.515 0.0785219400
## 52   glomerular visceral epithelial cell    0.395 0.1868131868
## 53   glomerular visceral epithelial cell    0.261 0.0384615385
## 54   glomerular visceral epithelial cell    0.187 0.0567375887
## 55   glomerular visceral epithelial cell    0.154 0.0426439232
## 56                    kidney tubule cell    0.014 0.0006788866
## 57                    kidney tubule cell    0.013 0.0021436227
## 58                    kidney tubule cell    0.012 0.0011217050
## 59                    kidney tubule cell    0.011 0.0000000000
## 60                    kidney tubule cell    0.009 0.0000000000
#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)

#divide seurat.mean to get star.adj and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.filt.adj <- frame.pct.pos$pct.filt/frame.pct.pos$pct.seurat.mean

#cal filt.adj.mean to arrange the plot
frame.pct.pos$pct.filt.adj.mean <- rep(aggregate(pct.filt.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.filt.adj,each = 5)

#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.filt.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))

visualize

mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","dark red"),
          position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif",method = "t.test") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))

5 plot of PN neurons

plot

DimPlot(snRNA_PN,cols = rev(viridis::mako(length(unique(snRNA_PN$sub_clust)))))

5.1 Find Marker genes from Seurat

Idents(snRNA_PN) <- snRNA_PN$sub_clust
out.markers.PN.seurat <- FindAllMarkers(snRNA_PN,only.pos = T,logfc.threshold = 0.5)
head(out.markers.PN.seurat)
##          p_val avg_log2FC pct.1 pct.2 p_val_adj       cluster     gene
## Serpine2     0   1.604872 0.823 0.372         0 L2_CUX2_LAMP5 Serpine2
## Cbln2        0   1.570557 0.978 0.660         0 L2_CUX2_LAMP5    Cbln2
## Pdzd2        0   1.355468 0.993 0.856         0 L2_CUX2_LAMP5    Pdzd2
## Gnal         0   1.287271 0.866 0.455         0 L2_CUX2_LAMP5     Gnal
## Rasgrf2      0   1.236217 0.968 0.737         0 L2_CUX2_LAMP5  Rasgrf2
## Cux2         0   1.190806 0.977 0.562         0 L2_CUX2_LAMP5     Cux2

arrange data

# select genes with p less than 0.05
gene.seurat <- subset(out.markers.PN.seurat,out.markers.PN.seurat$p_val_adj < 0.05)

# arrange genes according to log2FC
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))$gene

gene.seurat %>% head()
## [1] "Serpine2" "Cbln2"    "Pdzd2"    "Thsd7a"   "Gnal"     "Rasgrf2"

modify data

# create data for plotting
frame.seurat <- out.markers.PN.seurat[,c("cluster","gene","avg_log2FC")] %>% subset(out.markers.PN.seurat$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

# for comparing data
frame.seurat <- frame.seurat %>%
  group_by(cluster) %>%
  top_n(5, wt = avg_log2FC) %>% 
  ungroup()

# for plotting data
frame.seurat.3 <- frame.seurat %>%
  group_by(cluster) %>%
  top_n(3, wt = avg_log2FC) %>%
  ungroup()

frame.seurat$cluster %>% table()
## .
##     L2_CUX2_LAMP5  L5-6_TLE4_SORCS1    L3_CUX2_PRSS12  L5-6_THEMIS_CNR1 
##                 5                 5                 5                 5 
##  L5-6_TLE4_SCUBE1   L5-6_TLE4_HTR2C L5-6_THEMIS_NTNG2     L4_RORB_LRRK1 
##                 5                 5                 5                 5 
##       L4_RORB_MET     L4_RORB_dev-1       L4_RORB_MME   L2-3_CUX2_dev-1 
##                 5                 5                 5                 5 
##   L2-3_CUX2_dev-5   L2-3_CUX2_dev-3 L2_CUX2_LAMP5_dev 
##                 5                 5                 5
frame.seurat$cluster %>% table() %>% table()
## .
##  5 
## 15

plot

colors.gradient <- c("#13c4cd","#ffffff","#b91192")
(p <- DotPlot(
    snRNA_PN,
    assay = "RNA",
    features = rev(frame.seurat.3$gene),
    #cols = c("white", "red"), #replaced with scale color function
    col.min = -2.5,
    col.max = 2.5,
    dot.min = 0,
    dot.scale = 2,
    idents = NULL,
    group.by = NULL,
    split.by = NULL,
    cluster.idents = FALSE,
    scale = TRUE,
    scale.by = "radius",
    scale.min = NA,
    scale.max = NA
  ) + coord_flip() + scale_colour_gradient2(low = colors.gradient[1],mid = colors.gradient[2], high = colors.gradient[3]) +
    ggthemes::theme_few() + theme(axis.text = element_text(size = 8),axis.ticks = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

5.2 FilterMarker

out.marker.PN.filt <- starTracer::filterMarker(x = snRNA_PN,
                                           ident.use = "sub_clust",
                                           out.markers.PN.seurat,
                                           num = 3)
## getting dgCMatrix from object
## calculating gathering result
## calculating expression info
## We found 3 marker genes for cluster: L2_CUX2_LAMP5,L5-6_TLE4_SORCS1,L3_CUX2_PRSS12,L5-6_THEMIS_CNR1,L5-6_TLE4_SCUBE1,L5-6_TLE4_HTR2C,L5-6_THEMIS_NTNG2,L4_RORB_LRRK1,L4_RORB_MET,L4_RORB_dev-1,L4_RORB_MME,L2-3_CUX2_dev-1,L2-3_CUX2_dev-5,L2-3_CUX2_dev-3,L2_CUX2_LAMP5_dev

out.marker.PN.filt.res <- out.marker.PN.filt$marker.frame
colors.gradient <- c("#1ca9d5","#ffffff","#ea905e")
(p <- DotPlot(
    snRNA_PN,
    assay = "RNA",
    features = rev(out.marker.PN.filt$marker.frame$gene),
    #cols = c("white", "red"), #replaced with scale color function
    col.min = -2.5,
    col.max = 2.5,
    dot.min = 0,
    dot.scale = 2,
    idents = NULL,
    group.by = NULL,
    split.by = NULL,
    cluster.idents = FALSE,
    scale = TRUE,
    scale.by = "radius",
    scale.min = NA,
    scale.max = NA
  ) + coord_flip() + scale_colour_gradient2(low = colors.gradient[1],mid = colors.gradient[2], high = colors.gradient[3]) +
    ggthemes::theme_few() + theme(axis.text = element_text(size = 8),axis.ticks = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

#6 plot with PFC

6.1 calculate load data

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
out.marker.PFC.filt <- starTracer::filterMarker(x = snRNA.hpfc.sct,
                                           ident.use = "sub_clust",
                                           out.brain.Seurat,
                                           num = 3)
## getting dgCMatrix from object
## calculating gathering result
## calculating expression info
## We found 3 marker genes for cluster: Astro,Oligo,LAMP5_NOS1,OPC,L2-3_CUX2,L5-6_TLE4,Micro,L5-6_THEMIS,SST,PV_SCUBE3,VIP,ID2,PV,L4_RORB,Vas
## starTracer found 3 marker genes in cluster: Astro,Oligo,LAMP5_NOS1,OPC,L2-3_CUX2,L5-6_TLE4,Micro,L5-6_THEMIS,SST,PV_SCUBE3,VIP,ID2,PV,L4_RORB,Vas, for clusters PN_dev starTracer found 0 marker genes.

out.marker.PFC.filt.res <- out.marker.PFC.filt$marker.frame
colors.gradient <- c("#1ca9d5","#ffffff","#ea905e")
(p <- DotPlot(
    snRNA.hpfc.sct,
    assay = "RNA",
    features = rev(out.marker.PFC.filt$marker.frame$gene),
    #cols = c("white", "red"), #replaced with scale color function
    col.min = -2.5,
    col.max = 2.5,
    dot.min = 0,
    dot.scale = 2,
    idents = NULL,
    group.by = NULL,
    split.by = NULL,
    cluster.idents = FALSE,
    scale = TRUE,
    scale.by = "radius",
    scale.min = NA,
    scale.max = NA
  ) + coord_flip() + scale_colour_gradient2(low = colors.gradient[1],mid = colors.gradient[2], high = colors.gradient[3]) +
    ggthemes::theme_few() + theme(axis.text = element_text(size = 8),axis.ticks = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

6.2 specificity level

create filtermaker result and seurat result

seurat

gene.seurat <- subset(out.brain.Seurat,out.brain.Seurat$p_val_adj < 0.05)

gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))$gene

gene.seurat %>% head()
## [1] "Slc1a2" "Gpc5"   "Hpse2"  "Slc4a4" "Slc1a3" "Col5a3"
frame.seurat <- out.brain.Seurat[,c("cluster","gene","avg_log2FC")] %>% subset(out.brain.Seurat$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

frame.seurat <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
  ungroup()

frame.seurat$cluster %>% table()
## .
##       Astro       Oligo  LAMP5_NOS1         OPC   L2-3_CUX2   L5-6_TLE4 
##           5           5           5           5           5           5 
##       Micro L5-6_THEMIS         SST   PV_SCUBE3         VIP         ID2 
##           5           5           5           5           5           5 
##          PV     L4_RORB         Vas      PN_dev 
##           5           5           5           0
frame.seurat$cluster %>% table() %>% table()
## .
##  0  5 
##  1 15
frame.seurat <- frame.seurat[,c("cluster","gene")]
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "major_clust",as.data.frame(frame.seurat))

for star

out.marker.PFC.filt.res <- starTracer::filterMarker(x = snRNA.hpfc.sct,
                                           ident.use = "sub_clust",
                                           out.brain.Seurat,
                                           num = 5)$marker.frame
## getting dgCMatrix from object
## calculating gathering result
## calculating expression info
## We found 5 marker genes for cluster: Astro,Oligo,LAMP5_NOS1,OPC,L2-3_CUX2,L5-6_TLE4,Micro,L5-6_THEMIS,SST,PV_SCUBE3,VIP,ID2,PV,L4_RORB,Vas
## starTracer found 5 marker genes in cluster: Astro,Oligo,LAMP5_NOS1,OPC,L2-3_CUX2,L5-6_TLE4,Micro,L5-6_THEMIS,SST,PV_SCUBE3,VIP,ID2,PV,L4_RORB,Vas, for clusters PN_dev starTracer found 0 marker genes.

frame.star <- out.marker.PFC.filt.res[,c("cluster","gene")]
frame.star <- cal_spe(snRNA.hpfc.sct,ident.use = "major_clust",as.data.frame(frame.star))

frame.star <- subset(frame.star,frame.star$cluster %in% frame.seurat$cluster)
identical(frame.star$cluster,frame.seurat$cluster)
## [1] TRUE

compare

frame.pct.pos <- data.frame(cluster = frame.star$cluster,pct.star = frame.star$pct.pos,pct.seurat = frame.seurat$pct.pos)

#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)

#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.star.adj <- frame.pct.pos$pct.star/frame.pct.pos$pct.seurat.mean

#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.star.adj.mean <- rep(aggregate(pct.star.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.star.adj,each = 5)

#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.star.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))

p <- c()
foldchange <- c()
for (i in unique(frame.pct.pos$cluster)){
  mat <- subset(frame.pct.pos,cluster == i)
  t1 <- mat$pct.star
  t2 <- mat$pct.seurat
  p.i <- t.test(t1,t2)$p.value
  foldchange.i <- mean(mat$pct.star)/mean(mat$pct.seurat)

  foldchange <- append(foldchange,foldchange.i)
  p <- append(p,p.i)
}
frame.pct.pos.stat <- data.frame(cluster = unique(frame.pct.pos$cluster),p = p, foldchange = foldchange)


mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","blue"),
          position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))

save