To analysis the performance of starTracer at different annotation levels, we use the PFC data, with different annotation data:
BioType:Neuron(Exc, Inh),Astro,Micro…
Major_clust:Layer cluster: L2/3 ET…
Sub_clsut: L2/3 ET-1; L2/3 ET-2…
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
DimPlot(snRNA.hpfc.sct,group.by = "major_clust") + scale_color_manual(values = viridis::viridis(length(unique(Idents(snRNA.hpfc.sct)))))
The first level is the BioType, includes astrocyes, oligodendroglia, microglias, OPC, projection neuron and Inter-neuron.
to view on UMAP
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
p <- DimPlot(snRNA.hpfc.sct,group.by = "bio_clust") + scale_color_manual(values = viridis::rocket(length(unique(Idents(snRNA.hpfc.sct))))) + theme_void()
p
the second level is the major_clust
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
p <- DimPlot(snRNA.hpfc.sct,group.by = "major_clust") + scale_color_manual(values = viridis::viridis(length(unique(Idents(snRNA.hpfc.sct))))) + theme_void()
p
the third level is sub_clust
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$sub_clust
p <- DimPlot(snRNA.hpfc.sct,group.by = "sub_clust") + scale_color_manual(values = viridis::viridis(80)[10:76]) + theme_void()
p
starTracer’s result, we set thresh.1 = 0.5
#here we will calculate 2 results, find the top 5 and top 3 marker genes, respectively.
#Top 5 marker genes will be used to measure the specificity:
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
out.Star.Biotye <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.5,method = "del_MI",num = 5,gene.use = "HVG")
## using Astro, Oligo, IN, OPC, PN, Micro as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
#Top 3 marker genes will be used to draw dot plot:
out.Star.Biotye.3 <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.5,method = "del_MI",num = 3,gene.use = "HVG")
## using Astro, Oligo, IN, OPC, PN, Micro as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
dotPlot
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
(p <- use_dotplot(snRNA.hpfc.sct,features = out.Star.Biotye.3$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$bio_clust
out.Seurat.Biotye <- FindAllMarkers(snRNA.hpfc.sct,logfc.threshold = 0.5,only.pos = T)
We identify marker genes following the same criteria as in the previous analysis:
gene.seurat <- subset(out.Seurat.Biotye,out.Seurat.Biotye$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))
gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,10))) %>% unlist() %>% as.vector() %>% rev()
gene.seurat %>% head()
## [1] "Cyfip1" "Csf1r" "Slco2b1" "Sfmbt2" "Tbxas1" "Inpp5d"
We will further include the top 5 marker genes in each cluster.
frame.seurat <- out.Seurat.Biotye[,c("cluster","gene","avg_log2FC")] %>% subset(out.Seurat.Biotye$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))
frame.seurat <- frame.seurat %>%
group_by(cluster) %>% #按照cluster列进行分组
top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
ungroup()
#this is for plotting
frame.seurat.3 <- frame.seurat %>%
group_by(cluster) %>% #按照cluster列进行分组
top_n(3, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
ungroup()
frame.seurat$cluster %>% table()
## .
## Astro Oligo IN OPC PN Micro
## 5 5 5 5 5 5
we found 5 genes in each of the clusters, thus we plot a DotPlot
(p <- use_dotplot(snRNA.hpfc.sct,features = rev(frame.seurat.3$gene), colors.gradient = c("#13c4cd","#ffffff","#b91192")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
ggsave("./out/f4_levels/A_bioclust/seurat.dotplot.3genes.pdf",p,width = 4,height = 5)
Ti is defined as
\[ T_i=\frac{\mathrm{card}\ \left(\mathrm{G}_\mathrm{i}\mathrm{\cap}\ \mathrm{G}_{\mathrm{clust}}\right)}{\mathrm{card}\left(\mathrm{G}_\mathrm{i}\right)} \]
Where:
Gi represents the set of the cells expressing gene i in-silico.
Gclust represents the set of cells where gene i serves as the in-silico marker gene.
frame.seurat <- frame.seurat[,c("cluster","gene")]
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "bio_clust",as.data.frame(frame.seurat))
frame.star <- subset(out.Star.Biotye$para_frame,out.Star.Biotye$para_frame$gene %in% out.Star.Biotye$genes.markers)[,c("max.X","gene")]
colnames(frame.star)[1] <- "cluster"
frame.star <- cal_spe(snRNA.hpfc.sct,ident.use = "bio_clust",frame.star)
frame.star <- subset(frame.star,frame.star$cluster %in% frame.seurat$cluster)
identical(frame.star$cluster,frame.seurat$cluster)
frame.pct.pos <- data.frame(cluster = frame.star$cluster,pct.star = frame.star$pct.pos,pct.seurat = frame.seurat$pct.pos)
#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)
#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.star.adj <- frame.pct.pos$pct.star/frame.pct.pos$pct.seurat.mean
#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.star.adj.mean <- rep(aggregate(pct.star.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.star.adj,each = 5)
#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.star.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))
p <- c()
foldchange <- c()
for (i in unique(frame.pct.pos$cluster)){
mat <- subset(frame.pct.pos,cluster == i)
t1 <- mat$pct.star
t2 <- mat$pct.seurat
p.i <- t.test(t1,t2)$p.value
foldchange.i <- mean(mat$pct.star)/mean(mat$pct.seurat)
foldchange <- append(foldchange,foldchange.i)
p <- append(p,p.i)
}
frame.pct.pos.stat <- data.frame(cluster = unique(frame.pct.pos$cluster),p = p, foldchange = foldchange)
visualize
mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","blue"),
position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))
starTracer’s result, we use thresh.1 = 0.5
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
out.Star.Major <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.3,method = "del_MI",num = 5,gene.use = "HVG")
## using Astro, Oligo, LAMP5_NOS1, OPC, L2-3_CUX2, L5-6_TLE4, Micro, L5-6_THEMIS, SST, PV_SCUBE3, VIP, ID2, PV, L4_RORB, Vas, PN_dev as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
out.Star.Major.3 <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.3,method = "del_MI",num = 3,gene.use = "HVG")
## using Astro, Oligo, LAMP5_NOS1, OPC, L2-3_CUX2, L5-6_TLE4, Micro, L5-6_THEMIS, SST, PV_SCUBE3, VIP, ID2, PV, L4_RORB, Vas, PN_dev as ident...
## using HVG as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
dotPlot
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
(p <- use_dotplot(snRNA.hpfc.sct,features = out.Star.Major.3$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a"),dot.scale = 2))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$major_clust
out.Seurat.Major <- FindAllMarkers(snRNA.hpfc.sct,logfc.threshold = 0.5,only.pos = T)
marker genes of Seurat (please check previous chapters for more detailed explainations)
gene.seurat <- subset(out.Seurat.Major,out.Seurat.Major$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))
gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,10))) %>% unlist() %>% as.vector() %>% rev()
gene.seurat %>% head()
## [1] "Arhgap29" "Rbms3" "Lama2" "Itih5" "Abcb1b" "Atp10a"
we found more than 5 genes as the marker genes found in Seurat might be shared by several clusters.
frame.seurat <- out.Seurat.Major[,c("cluster","gene","avg_log2FC")] %>% subset(out.Seurat.Major$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))
frame.seurat <- frame.seurat %>%
group_by(cluster) %>% #按照cluster列进行分组
top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
ungroup()
frame.seurat$cluster %>% table()
## .
## Astro Oligo LAMP5_NOS1 OPC L2-3_CUX2 L5-6_TLE4
## 5 5 5 5 5 5
## Micro L5-6_THEMIS SST PV_SCUBE3 VIP ID2
## 5 5 5 5 5 5
## PV L4_RORB Vas PN_dev
## 5 5 5 0
frame.seurat.3 <- frame.seurat %>%
group_by(cluster) %>% #按照cluster列进行分组
top_n(3, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
ungroup()
we found 5 genes in each of the clusters, thus we plot a DotPlot
(p <- use_dotplot(snRNA.hpfc.sct,features = rev(frame.seurat.3$gene), colors.gradient = c("#13c4cd","#ffffff","#b91192"),dot.scale = 2))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
ggsave("./out/f4_levels/B_majorclust/seurat.dotplot.3genes.pdf",p,width = 4,height = 5)
frame.seurat <- frame.seurat[,c("cluster","gene")]
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "major_clust",as.data.frame(frame.seurat))
frame.star <- subset(out.Star.Major$para_frame,out.Star.Major$para_frame$gene %in% out.Star.Major$genes.markers)[,c("max.X","gene")]
colnames(frame.star)[1] <- "cluster"
frame.star <- cal_spe(snRNA.hpfc.sct,ident.use = "major_clust",frame.star)
frame.star <- subset(frame.star,frame.star$cluster %in% frame.seurat$cluster)
identical(frame.star$cluster,frame.seurat$cluster)
frame.pct.pos <- data.frame(cluster = frame.star$cluster,pct.star = frame.star$pct.pos,pct.seurat = frame.seurat$pct.pos)
#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)
#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.star.adj <- frame.pct.pos$pct.star/frame.pct.pos$pct.seurat.mean
#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.star.adj.mean <- rep(aggregate(pct.star.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.star.adj,each = 5)
#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.star.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))
p <- c()
foldchange <- c()
for (i in unique(frame.pct.pos$cluster)){
mat <- subset(frame.pct.pos,cluster == i)
t1 <- mat$pct.star
t2 <- mat$pct.seurat
p.i <- t.test(t1,t2)$p.value
foldchange.i <- mean(mat$pct.star)/mean(mat$pct.seurat)
foldchange <- append(foldchange,foldchange.i)
p <- append(p,p.i)
}
frame.pct.pos.stat <- data.frame(cluster = unique(frame.pct.pos$cluster),p = p, foldchange = foldchange)
visualize
mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","blue"),
position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif") + theme(axis.text.x = element_text(angle = 45,hjust = 1)))
save plot
As there will be too many marker genes, we decide to only show the top 2 marker genes in each cluster instead of top 3.
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$sub_clust
out.Star.Sub <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.2,method = "del_MI",num = 5,gene.use = NULL)
## using Astro_GFAP, Oligo-2, Oligo-3, Oligo-1, LAMP5_NOS1, OPC, Oligo-6, L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, Micro, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, SST_TH, Astro_SLC1A2, Oligo-4, SST_STK32A, Oligo_mat, OPC_MBP, SST_ADGRG6, L5-6_TLE4_SCUBE1, PV_SCUBE3, Oligo-5, VIP_ABI3BP, VIP_CRH, SST_B3GAT2, L5-6_TLE4_HTR2C, ID2_dev, L5-6_THEMIS_NTNG2, SST_CALB1_dev, PV_WFDC2, L4_RORB_LRRK1, VIP_KIRREL3, PV_SULF1, L4_RORB_MET, VIP_ADAMTSL1, PV_SULF1_dev, VIP_CHRM2, Micro_out, LAMP5_CCK, Vas_PDGFRB, SST_BRINP3, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, VIP_DPP6, VIP_HS3ST3A1, Vas_CLDN5, VIP_PCDH20, Vas_TBX18, PV_SST, SST_NPY, CCK_SYT6, CCK_SORCS1, PN_dev, SST_CALB1, CCK_RELN, LAMP5_NDNF, ID2_CSMD1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, SST_ADGRG6_dev, OPC_dev, L2_CUX2_LAMP5_dev, Astro_SLC1A2_dev, Oligo-7, Astro_dev-4 as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
out.Star.Sub.2 <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.2,method = "del_MI",num = 3,gene.use = NULL)
## using Astro_GFAP, Oligo-2, Oligo-3, Oligo-1, LAMP5_NOS1, OPC, Oligo-6, L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, Micro, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, SST_TH, Astro_SLC1A2, Oligo-4, SST_STK32A, Oligo_mat, OPC_MBP, SST_ADGRG6, L5-6_TLE4_SCUBE1, PV_SCUBE3, Oligo-5, VIP_ABI3BP, VIP_CRH, SST_B3GAT2, L5-6_TLE4_HTR2C, ID2_dev, L5-6_THEMIS_NTNG2, SST_CALB1_dev, PV_WFDC2, L4_RORB_LRRK1, VIP_KIRREL3, PV_SULF1, L4_RORB_MET, VIP_ADAMTSL1, PV_SULF1_dev, VIP_CHRM2, Micro_out, LAMP5_CCK, Vas_PDGFRB, SST_BRINP3, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, VIP_DPP6, VIP_HS3ST3A1, Vas_CLDN5, VIP_PCDH20, Vas_TBX18, PV_SST, SST_NPY, CCK_SYT6, CCK_SORCS1, PN_dev, SST_CALB1, CCK_RELN, LAMP5_NDNF, ID2_CSMD1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, SST_ADGRG6_dev, OPC_dev, L2_CUX2_LAMP5_dev, Astro_SLC1A2_dev, Oligo-7, Astro_dev-4 as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
dotPlot, as some parameters are changed, we don’t directly use
use_dotplot
functionl.
colors.gradient = c("#018daa","#ffffff","#ed463a")
(p <- DotPlot(
snRNA.hpfc.sct,
assay = "RNA",
features = out.Star.Sub.2$genes.markers,
#cols = c("white", "red"), #replaced with scale color function
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 2,
idents = NULL,
group.by = NULL,
split.by = NULL,
cluster.idents = FALSE,
scale = TRUE,
scale.by = "radius",
scale.min = NA,
scale.max = NA
) + coord_flip() + scale_colour_gradient2(low = colors.gradient[1],mid = colors.gradient[2], high = colors.gradient[3]) +
ggthemes::theme_few() + theme(axis.text = element_text(size = 8),axis.ticks = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Idents(snRNA.hpfc.sct) <- snRNA.hpfc.sct$sub_clust
out.Seurat.Sub <- FindAllMarkers(snRNA.hpfc.sct,logfc.threshold = 0.5,only.pos = T)
marker genes of Seurat
gene.seurat <- subset(out.Seurat.Sub,out.Seurat.Sub$p_val_adj < 0.05)
gene.seurat <- arrange(gene.seurat,cluster,desc(avg_log2FC))$gene
#gene.seurat <- with(gene.seurat,by(gene.seurat[,"gene"],INDICES = cluster,FUN = function(x)head(x,30))) %>% unlist() %>% as.vector() %>% rev()
gene.seurat %>% head()
## [1] "Gfap" "Dpp10" "Tnc" "Aqp4" "Kcnn3" "Sparcl1"
We then remove the duplicated marker genes
frame.seurat <- out.Seurat.Sub[,c("cluster","gene","avg_log2FC")] %>% subset(out.Seurat.Sub$gene %in% gene.seurat)
frame.seurat <- arrange(frame.seurat,gene,desc(avg_log2FC))
frame.seurat <- frame.seurat[!duplicated(frame.seurat$gene),] %>% arrange(cluster,desc(avg_log2FC))
frame.seurat <- frame.seurat %>%
group_by(cluster) %>% #按照cluster列进行分组
top_n(5, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
ungroup()
#this is for plota
frame.seurat.2 <- frame.seurat %>%
group_by(cluster) %>% #按照cluster列进行分组
top_n(2, wt = avg_log2FC) %>% #按照FC列从大到小排序,选择每组前2行
ungroup()
we found 5 genes in almost all the clusters, but for Astro_dev-4, PN_dev and Oligo-7, we failed to identify marker genes in it.
colors.gradient <- c("#13c4cd","#ffffff","#b91192")
(p <- DotPlot(
snRNA.hpfc.sct,
assay = "RNA",
features = rev(frame.seurat.2$gene),
#cols = c("white", "red"), #replaced with scale color function
col.min = -2.5,
col.max = 2.5,
dot.min = 0,
dot.scale = 2,
idents = NULL,
group.by = NULL,
split.by = NULL,
cluster.idents = FALSE,
scale = TRUE,
scale.by = "radius",
scale.min = NA,
scale.max = NA
) + coord_flip() + scale_colour_gradient2(low = colors.gradient[1],mid = colors.gradient[2], high = colors.gradient[3]) +
ggthemes::theme_few() + theme(axis.text = element_text(size = 8),axis.ticks = element_blank(),axis.text.x = element_text(angle = 45, hjust = 1)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
frame.seurat <- frame.seurat[,c("cluster","gene")]
frame.seurat <- cal_spe(snRNA.hpfc.sct,ident.use = "sub_clust",as.data.frame(frame.seurat))
frame.star <- subset(out.Star.Sub$para_frame,out.Star.Sub$para_frame$gene %in% out.Star.Sub$genes.markers)[,c("max.X","gene")]
colnames(frame.star)[1] <- "cluster"
frame.star <- cal_spe(snRNA.hpfc.sct,ident.use = "sub_clust",frame.star)
frame.star <- subset(frame.star,frame.star$cluster %in% frame.seurat$cluster)
identical(frame.star$cluster,frame.seurat$cluster)
frame.pct.pos <- data.frame(cluster = frame.star$cluster,pct.star = frame.star$pct.pos,pct.seurat = frame.seurat$pct.pos)
#cal seurat.mean
frame.pct.pos$pct.seurat.mean <- rep(aggregate(pct.seurat ~ cluster, data = frame.pct.pos, FUN = mean)$pct.seurat,each = 5)
#divide seurat.mean to get star.adk and seurat.adj
frame.pct.pos$pct.seurat.adj <- frame.pct.pos$pct.seurat/frame.pct.pos$pct.seurat.mean
frame.pct.pos$pct.star.adj <- frame.pct.pos$pct.star/frame.pct.pos$pct.seurat.mean
#cal star.adj.mean to arrange the plot
frame.pct.pos$pct.star.adj.mean <- rep(aggregate(pct.star.adj ~ cluster, data = frame.pct.pos, FUN = mean)$pct.star.adj,each = 5)
#arrange
frame.pct.pos <- arrange(frame.pct.pos,desc(pct.star.adj.mean))
frame.pct.pos$cluster <- factor(frame.pct.pos$cluster,levels = unique(frame.pct.pos$cluster))
p <- c()
foldchange <- c()
for (i in unique(frame.pct.pos$cluster)){
mat <- subset(frame.pct.pos,cluster == i)
t1 <- mat$pct.star
t2 <- mat$pct.seurat
p.i <- t.test(t1,t2)$p.value
foldchange.i <- mean(mat$pct.star)/mean(mat$pct.seurat)
foldchange <- append(foldchange,foldchange.i)
p <- append(p,p.i)
}
frame.pct.pos.stat <- data.frame(cluster = unique(frame.pct.pos$cluster),p = p, foldchange = foldchange)
mat.use <- reshape2::melt(frame.pct.pos[,c(1,5,6)])
## Using cluster as id variables
(p <- ggbarplot(mat.use, x = "cluster", y = "value", add = c("mean_se"),color = "variable",fill = "white",palette = c("#fcdc57","blue"),
position = position_dodge(0.8)) + stat_compare_means(aes(group = variable), label = "p.signif") + theme(axis.text.x = element_text(angle = 45,hjust = 1,size = 3)))