To analysis the performance of starTracer at different annotation levels, we use the PFC data, with different annotation data:

  1. BioType:Neuron(Exc, Inh),Astro,Micro…

  2. Major_clust:Layer cluster: L2/3 ET…

  3. Sub_clsut: L2/3 ET-1; L2/3 ET-2…

1 load data

UMAP plot of the data from Human Prefrontal Cortex

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
DimPlot(snRNA.hpfc.sct,group.by = "major_clust") + scale_color_manual(values = viridis::viridis(length(unique(Idents(snRNA.hpfc.sct)))))

Different Cluster Levels

The first level is the BioType, includes astrocyes, oligodendroglia, microglias, OPC, projection neuron and Inter-neuron.

to view on UMAP

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
p <- DimPlot(snRNA.hpfc.sct,group.by = "bio_clust") + scale_color_manual(values = viridis::rocket(length(unique(Idents(snRNA.hpfc.sct))))) + theme_void()
p

the second level is the major_clust

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
p <- DimPlot(snRNA.hpfc.sct,group.by = "major_clust") + scale_color_manual(values = viridis::viridis(length(unique(Idents(snRNA.hpfc.sct))))) + theme_void()
p

the third level is sub_clust

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$sub_clust
p <- DimPlot(snRNA.hpfc.sct,group.by = "sub_clust") + scale_color_manual(values = viridis::viridis(80)[10:76]) + theme_void()
p

2 Bio_type level

starTracer’s result

starTracer’s result, we set thresh.1 = 0.5

#here we will calculate 2 results, find the top 5 and top 3 marker genes, respectively.
#Top 5 marker genes will be used to measure the specificity:
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
out.Star.Biotye <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.5,method = "del_MI",num = 5,gene.use = "HVG")
## using Astro, Oligo, IN, OPC, PN, Micro as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!

#Top 3 marker genes will be used to draw dot plot:
out.Star.Biotye.3 <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.5,method = "del_MI",num = 3,gene.use = "HVG")
## using Astro, Oligo, IN, OPC, PN, Micro as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!

dotPlot

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
(p <- use_dotplot(snRNA.hpfc.sct,features = out.Star.Biotye.3$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Seurat’s result

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
out.Seurat.Biotye <- FindAllMarkers(snRNA.hpfc.sct,logfc.threshold = 0.5,only.pos = T)

We identify marker genes following the same criteria as in the previous analysis:

  1. Find marker genes using FindAllMarkers function.
  2. Subset the matrix by setting p-value < 0.05
  3. re-arrange the data.frame by cluster and descending average log2(Fold-Change)
  4. Keep the top 10 marker gene in each cluster
gene.seurat <- subset(out.Seurat.Biotye,out.Seurat.Biotye$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))
gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,10))) %>% unlist() %>% as.vector() %>% rev()

gene.seurat %>% head()
## [1] "Cyfip1"  "Csf1r"   "Slco2b1" "Sfmbt2"  "Tbxas1"  "Inpp5d"

We will further include the top 5 marker genes in each cluster.

frame.seurat <- out.Seurat.Biotye[,c("cluster","gene","avg_log2FC")] %>% subset(out.Seurat.Biotye$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

frame.seurat <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
  ungroup()

#this is for plotting
frame.seurat.3 <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(3, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
  ungroup()
frame.seurat$cluster %>% table() 
## .
## Astro Oligo    IN   OPC    PN Micro 
##     5     5     5     5     5     5

we found 5 genes in each of the clusters, thus we plot a DotPlot

(p <- use_dotplot(snRNA.hpfc.sct,features = rev(frame.seurat.3$gene), colors.gradient = c("#13c4cd","#ffffff","#b91192")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

ggsave("./out/f4_levels/A_bioclust/seurat.dotplot.3genes.pdf",p,width = 4,height = 5)

Specificity Level

Ti is defined as

\[ T_i=\frac{\mathrm{card}\ \left(\mathrm{G}_\mathrm{i}\mathrm{\cap}\ \mathrm{G}_{\mathrm{clust}}\right)}{\mathrm{card}\left(\mathrm{G}_\mathrm{i}\right)} \]

Where:

Gi represents the set of the cells expressing gene i in-silico.

Gclust represents the set of cells where gene i serves as the in-silico marker gene.

frame.seurat <- frame.seurat[,c("cluster","gene")]
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "bio_clust",as.data.frame(frame.seurat))

frame.star <- subset(out.Star.Biotye$para_frame,out.Star.Biotye$para_frame$gene %in% out.Star.Biotye$genes.markers)[,c("max.X","gene")]
colnames(frame.star)[1] <- "cluster"
frame.star <- cal_spe(snRNA.hpfc.sct,ident.use = "bio_clust",frame.star)

frame.star <- subset(frame.star,frame.star$cluster %in% frame.seurat$cluster)
identical(frame.star$cluster,frame.seurat$cluster)

frame.pct.pos <- data.frame(cluster = frame.star$cluster,pct.star = frame.star$pct.pos,pct.seurat = frame.seurat$pct.pos)

#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)

#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.star.adj <- frame.pct.pos$pct.star/frame.pct.pos$pct.seurat.mean

#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.star.adj.mean <- rep(aggregate(pct.star.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.star.adj,each = 5)

#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.star.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))

p <- c()
foldchange <- c()
for (i in unique(frame.pct.pos$cluster)){
  mat <- subset(frame.pct.pos,cluster == i)
  t1 <- mat$pct.star
  t2 <- mat$pct.seurat
  p.i <- t.test(t1,t2)$p.value
  foldchange.i <- mean(mat$pct.star)/mean(mat$pct.seurat)

  foldchange <- append(foldchange,foldchange.i)
  p <- append(p,p.i)
}
frame.pct.pos.stat <- data.frame(cluster = unique(frame.pct.pos$cluster),p = p, foldchange = foldchange)

visualize

mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","blue"),
          position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))

3 Major_clust Level

starTracer’s result

starTracer’s result, we use thresh.1 = 0.5

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
out.Star.Major <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.3,method = "del_MI",num = 5,gene.use = "HVG")
## using Astro, Oligo, LAMP5_NOS1, OPC, L2-3_CUX2, L5-6_TLE4, Micro, L5-6_THEMIS, SST, PV_SCUBE3, VIP, ID2, PV, L4_RORB, Vas, PN_dev as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!

out.Star.Major.3 <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.3,method = "del_MI",num = 3,gene.use = "HVG")
## using Astro, Oligo, LAMP5_NOS1, OPC, L2-3_CUX2, L5-6_TLE4, Micro, L5-6_THEMIS, SST, PV_SCUBE3, VIP, ID2, PV, L4_RORB, Vas, PN_dev as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!

dotPlot

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
(p <- use_dotplot(snRNA.hpfc.sct,features = out.Star.Major.3$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a"),dot.scale = 2))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Seurat’s result

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
out.Seurat.Major <- FindAllMarkers(snRNA.hpfc.sct,logfc.threshold = 0.5,only.pos = T)

marker genes of Seurat (please check previous chapters for more detailed explainations)

gene.seurat <- subset(out.Seurat.Major,out.Seurat.Major$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))
gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,10))) %>% unlist() %>% as.vector() %>% rev()

gene.seurat %>% head()
## [1] "Arhgap29" "Rbms3"    "Lama2"    "Itih5"    "Abcb1b"   "Atp10a"

we found more than 5 genes as the marker genes found in Seurat might be shared by several clusters.

frame.seurat <- out.Seurat.Major[,c("cluster","gene","avg_log2FC")] %>% subset(out.Seurat.Major$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

frame.seurat <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
  ungroup()
frame.seurat$cluster %>% table() 
## .
##       Astro       Oligo  LAMP5_NOS1         OPC   L2-3_CUX2   L5-6_TLE4 
##           5           5           5           5           5           5 
##       Micro L5-6_THEMIS         SST   PV_SCUBE3         VIP         ID2 
##           5           5           5           5           5           5 
##          PV     L4_RORB         Vas      PN_dev 
##           5           5           5           0
frame.seurat.3 <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(3, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
  ungroup()

we found 5 genes in each of the clusters, thus we plot a DotPlot

(p <- use_dotplot(snRNA.hpfc.sct,features = rev(frame.seurat.3$gene), colors.gradient = c("#13c4cd","#ffffff","#b91192"),dot.scale = 2))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

ggsave("./out/f4_levels/B_majorclust/seurat.dotplot.3genes.pdf",p,width = 4,height = 5)

Specificity Level

frame.seurat <- frame.seurat[,c("cluster","gene")]
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "major_clust",as.data.frame(frame.seurat))

frame.star <- subset(out.Star.Major$para_frame,out.Star.Major$para_frame$gene %in% out.Star.Major$genes.markers)[,c("max.X","gene")]
colnames(frame.star)[1] <- "cluster"
frame.star <- cal_spe(snRNA.hpfc.sct,ident.use = "major_clust",frame.star)

frame.star <- subset(frame.star,frame.star$cluster %in% frame.seurat$cluster)
identical(frame.star$cluster,frame.seurat$cluster)

frame.pct.pos <- data.frame(cluster = frame.star$cluster,pct.star = frame.star$pct.pos,pct.seurat = frame.seurat$pct.pos)

#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)

#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.star.adj <- frame.pct.pos$pct.star/frame.pct.pos$pct.seurat.mean

#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.star.adj.mean <- rep(aggregate(pct.star.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.star.adj,each = 5)

#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.star.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))

p <- c()
foldchange <- c()
for (i in unique(frame.pct.pos$cluster)){
  mat <- subset(frame.pct.pos,cluster == i)
  t1 <- mat$pct.star
  t2 <- mat$pct.seurat
  p.i <- t.test(t1,t2)$p.value
  foldchange.i <- mean(mat$pct.star)/mean(mat$pct.seurat)

  foldchange <- append(foldchange,foldchange.i)
  p <- append(p,p.i)
}
frame.pct.pos.stat <- data.frame(cluster = unique(frame.pct.pos$cluster),p = p, foldchange = foldchange)

visualize

mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","blue"),
          position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))

save plot

4 Sub_clust Level

starTracer’s result

As there will be too many marker genes, we decide to only show the top 2 marker genes in each cluster instead of top 3.

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$sub_clust
out.Star.Sub <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.2,method = "del_MI",num = 5,gene.use = NULL)
## using Astro_GFAP, Oligo-2, Oligo-3, Oligo-1, LAMP5_NOS1, OPC, Oligo-6, L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, Micro, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, SST_TH, Astro_SLC1A2, Oligo-4, SST_STK32A, Oligo_mat, OPC_MBP, SST_ADGRG6, L5-6_TLE4_SCUBE1, PV_SCUBE3, Oligo-5, VIP_ABI3BP, VIP_CRH, SST_B3GAT2, L5-6_TLE4_HTR2C, ID2_dev, L5-6_THEMIS_NTNG2, SST_CALB1_dev, PV_WFDC2, L4_RORB_LRRK1, VIP_KIRREL3, PV_SULF1, L4_RORB_MET, VIP_ADAMTSL1, PV_SULF1_dev, VIP_CHRM2, Micro_out, LAMP5_CCK, Vas_PDGFRB, SST_BRINP3, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, VIP_DPP6, VIP_HS3ST3A1, Vas_CLDN5, VIP_PCDH20, Vas_TBX18, PV_SST, SST_NPY, CCK_SYT6, CCK_SORCS1, PN_dev, SST_CALB1, CCK_RELN, LAMP5_NDNF, ID2_CSMD1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, SST_ADGRG6_dev, OPC_dev, L2_CUX2_LAMP5_dev, Astro_SLC1A2_dev, Oligo-7, Astro_dev-4 as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!

out.Star.Sub.2 <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.2,method = "del_MI",num = 3,gene.use = NULL)
## using Astro_GFAP, Oligo-2, Oligo-3, Oligo-1, LAMP5_NOS1, OPC, Oligo-6, L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, Micro, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, SST_TH, Astro_SLC1A2, Oligo-4, SST_STK32A, Oligo_mat, OPC_MBP, SST_ADGRG6, L5-6_TLE4_SCUBE1, PV_SCUBE3, Oligo-5, VIP_ABI3BP, VIP_CRH, SST_B3GAT2, L5-6_TLE4_HTR2C, ID2_dev, L5-6_THEMIS_NTNG2, SST_CALB1_dev, PV_WFDC2, L4_RORB_LRRK1, VIP_KIRREL3, PV_SULF1, L4_RORB_MET, VIP_ADAMTSL1, PV_SULF1_dev, VIP_CHRM2, Micro_out, LAMP5_CCK, Vas_PDGFRB, SST_BRINP3, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, VIP_DPP6, VIP_HS3ST3A1, Vas_CLDN5, VIP_PCDH20, Vas_TBX18, PV_SST, SST_NPY, CCK_SYT6, CCK_SORCS1, PN_dev, SST_CALB1, CCK_RELN, LAMP5_NDNF, ID2_CSMD1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, SST_ADGRG6_dev, OPC_dev, L2_CUX2_LAMP5_dev, Astro_SLC1A2_dev, Oligo-7, Astro_dev-4 as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!

dotPlot, as some parameters are changed, we don’t directly use use_dotplot functionl.

colors.gradient = c("#018daa","#ffffff","#ed463a")
(p <- DotPlot(
    snRNA.hpfc.sct,
    assay = "RNA",
    features = out.Star.Sub.2$genes.markers,
    #cols = c("white", "red"), #replaced with scale color function
    col.min = -2.5,
    col.max = 2.5,
    dot.min = 0,
    dot.scale = 2,
    idents = NULL,
    group.by = NULL,
    split.by = NULL,
    cluster.idents = FALSE,
    scale = TRUE,
    scale.by = "radius",
    scale.min = NA,
    scale.max = NA
  ) + coord_flip() + scale_colour_gradient2(low = colors.gradient[1],mid = colors.gradient[2], high = colors.gradient[3]) +
    ggthemes::theme_few() + theme(axis.text = element_text(size = 8),axis.ticks = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Seurat’s result

Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$sub_clust
out.Seurat.Sub <- FindAllMarkers(snRNA.hpfc.sct,logfc.threshold = 0.5,only.pos = T)

marker genes of Seurat

gene.seurat <- subset(out.Seurat.Sub,out.Seurat.Sub$p_val_adj < 0.05)

gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))$gene
#gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,30))) %>% unlist() %>% as.vector() %>% rev()

gene.seurat %>% head()
## [1] "Gfap"    "Dpp10"   "Tnc"     "Aqp4"    "Kcnn3"   "Sparcl1"

We then remove the duplicated marker genes

frame.seurat <- out.Seurat.Sub[,c("cluster","gene","avg_log2FC")] %>% subset(out.Seurat.Sub$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))

frame.seurat <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
  ungroup()

#this is for plota
frame.seurat.2 <- frame.seurat %>%
  group_by(cluster) %>% #按照cluster列进行分组
  top_n(2, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
  ungroup()

we found 5 genes in almost all the clusters, but for Astro_dev-4, PN_dev and Oligo-7, we failed to identify marker genes in it.

colors.gradient <- c("#13c4cd","#ffffff","#b91192")
(p <- DotPlot(
    snRNA.hpfc.sct,
    assay = "RNA",
    features = rev(frame.seurat.2$gene),
    #cols = c("white", "red"), #replaced with scale color function
    col.min = -2.5,
    col.max = 2.5,
    dot.min = 0,
    dot.scale = 2,
    idents = NULL,
    group.by = NULL,
    split.by = NULL,
    cluster.idents = FALSE,
    scale = TRUE,
    scale.by = "radius",
    scale.min = NA,
    scale.max = NA
  ) + coord_flip() + scale_colour_gradient2(low = colors.gradient[1],mid = colors.gradient[2], high = colors.gradient[3]) +
    ggthemes::theme_few() + theme(axis.text = element_text(size = 8),axis.ticks = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Specificity Level

frame.seurat <- frame.seurat[,c("cluster","gene")]
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "sub_clust",as.data.frame(frame.seurat))

frame.star <- subset(out.Star.Sub$para_frame,out.Star.Sub$para_frame$gene %in% out.Star.Sub$genes.markers)[,c("max.X","gene")]
colnames(frame.star)[1] <- "cluster"
frame.star <- cal_spe(snRNA.hpfc.sct,ident.use = "sub_clust",frame.star)

frame.star <- subset(frame.star,frame.star$cluster %in% frame.seurat$cluster)
identical(frame.star$cluster,frame.seurat$cluster)

frame.pct.pos <- data.frame(cluster = frame.star$cluster,pct.star = frame.star$pct.pos,pct.seurat = frame.seurat$pct.pos)

#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)

#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.star.adj <- frame.pct.pos$pct.star/frame.pct.pos$pct.seurat.mean

#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.star.adj.mean <- rep(aggregate(pct.star.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.star.adj,each = 5)

#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.star.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))

p <- c()
foldchange <- c()
for (i in unique(frame.pct.pos$cluster)){
  mat <- subset(frame.pct.pos,cluster == i)
  t1 <- mat$pct.star
  t2 <- mat$pct.seurat
  p.i <- t.test(t1,t2)$p.value
  foldchange.i <- mean(mat$pct.star)/mean(mat$pct.seurat)

  foldchange <- append(foldchange,foldchange.i)
  p <- append(p,p.i)
}
frame.pct.pos.stat <- data.frame(cluster = unique(frame.pct.pos$cluster),p = p, foldchange = foldchange)
mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","blue"),
          position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif") + theme(axis.text.x = element_text(angle = 45,hjust = 1,size = 3)))