To test the regidity of starTracer and the ability to find markers from rare cell types, we will generate a series of data from the huamn PFC snRNA-seq data. Given the fact that starTracer has a good performance especially on finding the markers of the inhibotory neurons, we will gererate subset objects from 100% to 10%, to munually generate “rare” subtypes with knowing the groud-truth of the marker genes (markers found at 100% level). # Ground truth We calculate the top 50 marker genes as the ground truth.
ground_truth <- starTracer::searchMarker(snRNA.hpfc.sct,thresh.1 = 0.5,thresh.2 = 0.3,method = "del_MI",num = 50)
ground_truth$genes.markers #800 genes in total
pct <- seq(1,0.1,-0.1)
cells.all <- colnames(snRNA.hpfc.sct)
inh.neuron <- c("SST","PV_SCUBE3","VIP","ID2","PV")
num.SST <- 1001;num.PV_SCUBE3 <- 137;num.VIP <- 820;num.ID2 <- 689;num.PV <- 558
num.all <- nrow(subset(snRNA.hpfc.sct@meta.data,snRNA.hpfc.sct@meta.data$major_clust %in% inh.neuron))
cells.inhi <- rownames(subset(snRNA.hpfc.sct@meta.data,snRNA.hpfc.sct@meta.data$major_clust %in% inh.neuron))
cells.SST <- rownames(subset(snRNA.hpfc.sct@meta.data,snRNA.hpfc.sct@meta.data$major_clust %in% "SST"))
cells.PV_SCUBE3 <- rownames(subset(snRNA.hpfc.sct@meta.data,snRNA.hpfc.sct@meta.data$major_clust %in% "PV_SCUBE3"))
cells.VIP <- rownames(subset(snRNA.hpfc.sct@meta.data,snRNA.hpfc.sct@meta.data$major_clust %in% "VIP"))
cells.ID2 <- rownames(subset(snRNA.hpfc.sct@meta.data,snRNA.hpfc.sct@meta.data$major_clust %in% "ID2"))
cells.PV <- rownames(subset(snRNA.hpfc.sct@meta.data,snRNA.hpfc.sct@meta.data$major_clust %in% "PV"))
cells.excl <- setdiff(colnames(snRNA.hpfc.sct),cells.inhi)
markers.rare <- list()
for (i in 1:10) {
pct.i <- pct[i]
num.SST.i <- num.SST*pct.i;num.PV_SCUBE3.i <- num.PV_SCUBE3*pct.i;num.VIP.i <- num.VIP*pct.i;num.ID2.i <- num.ID2*pct.i;num.PV.i <- num.PV*pct.i
cells.SST.i <- sample(cells.SST,num.SST.i)
cells.PV_SCUBE3.i <- sample(cells.PV_SCUBE3,num.PV_SCUBE3.i)
cells.VIP.i <- sample(cells.VIP,num.VIP.i)
cells.ID2.i <- sample(cells.ID2,num.ID2.i)
cells.PV.i <- sample(cells.PV,num.PV.i)
cells.keep <- c(cells.excl,cells.SST.i,cells.PV_SCUBE3.i,cells.VIP.i,cells.ID2.i,cells.PV.i)
obj.i <- snRNA.hpfc.sct[,cells.keep]
Idents(obj.i) <- obj.i$major_clust
markers.i <- starTracer::searchMarker(obj.i,thresh.1 = 0.5,thresh.2 = 0.3,method = "del_MI",num = 50)
markers.rare[[i]] <- markers.i
}
markers.rare.frame <- lapply(markers.rare,function(x){
frame <- subset(x$para_frame,x$para_frame$gene %in% x$genes.markers)
frame <- subset(frame,frame$max.X %in% inh.neuron)[,c("gene","max.X")]
return(frame)
})
markers.pct <- purrr::reduce(markers.rare.frame,cbind)
markers.pct <- markers.pct[,seq(1,20,2)]
colnames(markers.pct) <- seq(1,0.1,-0.1)
markers.pct$cluster <- markers.rare.frame[[1]]$max.X
markers.pct.top <- markers.pct %>% group_by(cluster) %>% slice_head(n = 5) %>% ungroup()
markers.pct.top <- as.data.frame(markers.pct.top)
write.csv(markers.pct.top,file = "./out/rev_rare/markers.pct.top.csv")
top 50 intersection
intersected.num <- matrix(data = NA,nrow = 5,ncol = 10) %>% as.data.frame()
rownames(intersected.num) <- unique(markers.pct$cluster)
colnames(intersected.num) <- paste0("pct.",seq(1,0.1,-0.1))
for (i in 1:10) {
for (j in unique(markers.pct$cluster)) {
mat <- subset(markers.pct,markers.pct$cluster %in% j)
intersected.num[j,i] <- intersect(mat$`1`,mat[,i]) %>% length()
}
}
intersected.num$cluster <- rownames(intersected.num)
mat <- reshape2::melt(intersected.num,id = "cluster")
p <- ggplot(data = mat) + geom_line(aes(x = variable, y = value, group = cluster, color = cluster)) + scale_color_manual(values = viridis::magma(6)) +
ylim(c(0,50)) + theme_bw()
ggsave("./out/rev_rare/line.pdf",p,width = 5,height = 5)
pct.i <- pct[10]
num.SST.i <- num.SST*pct.i;num.PV_SCUBE3.i <- num.PV_SCUBE3*pct.i;num.VIP.i <- num.VIP*pct.i;num.ID2.i <- num.ID2*pct.i;num.PV.i <- num.PV*pct.i
cells.SST.i <- sample(cells.SST,num.SST.i)
cells.PV_SCUBE3.i <- sample(cells.PV_SCUBE3,num.PV_SCUBE3.i)
cells.VIP.i <- sample(cells.VIP,num.VIP.i)
cells.ID2.i <- sample(cells.ID2,num.ID2.i)
cells.PV.i <- sample(cells.PV,num.PV.i)
cells.keep <- c(cells.SST.i,cells.PV_SCUBE3.i,cells.VIP.i,cells.ID2.i,cells.PV.i)
obj.i <- snRNA.hpfc.sct[,cells.keep]
p <- use_dotplot(obj.i,features = rev(markers.rare.frame[[10]]$gene), colors.gradient = c("#018daa","#ffffff","#ed463a")) +
theme(axis.text.y = element_text(size = 4))
ggsave("./out/rev_rare/dot.pdf",p,width = 5,height = 15)
top 5 intersection
intersected.num <- matrix(data = NA,nrow = 5,ncol = 10) %>% as.data.frame()
rownames(intersected.num) <- unique(markers.pct.top$cluster)
colnames(intersected.num) <- paste0("pct.",seq(1,0.1,-0.1))
for (i in 1:10) {
for (j in unique(markers.pct.top$cluster)) {
mat <- subset(markers.pct.top,markers.pct.top$cluster %in% j)
intersected.num[j,i] <- intersect(mat$`1`,mat[,i]) %>% length()
}
}
intersected.num$cluster <- rownames(intersected.num)
mat <- reshape2::melt(intersected.num,id = "cluster")
p <- ggplot(data = mat) + geom_line(aes(x = variable, y = value, group = cluster, color = cluster)) + scale_color_manual(values = viridis::magma(6)) +
ylim(c(0,6)) + theme_bw()
ggsave("./out/rev_rare/line_top5.pdf",p,width = 5,height = 5)
pct.i <- pct[10]
num.SST.i <- num.SST*pct.i;num.PV_SCUBE3.i <- num.PV_SCUBE3*pct.i;num.VIP.i <- num.VIP*pct.i;num.ID2.i <- num.ID2*pct.i;num.PV.i <- num.PV*pct.i
cells.SST.i <- sample(cells.SST,num.SST.i)
cells.PV_SCUBE3.i <- sample(cells.PV_SCUBE3,num.PV_SCUBE3.i)
cells.VIP.i <- sample(cells.VIP,num.VIP.i)
cells.ID2.i <- sample(cells.ID2,num.ID2.i)
cells.PV.i <- sample(cells.PV,num.PV.i)
cells.keep <- c(cells.SST.i,cells.PV_SCUBE3.i,cells.VIP.i,cells.ID2.i,cells.PV.i)
obj.i <- snRNA.hpfc.sct[,cells.keep]
p <- use_dotplot(obj.i,features = rev(markers.rare.frame[[10]]$gene), colors.gradient = c("#018daa","#ffffff","#ed463a")) +
theme(axis.text.y = element_text(size = 4))
ggsave("./out/rev_rare/dot.pdf",p,width = 5,height = 15)
colors.PFC <- c(SST = "#DE4968FF",
PV_SCUBE3 = "#8C2981FF",
VIP = "#FE9F6DFF",
ID2 = "#000004FF",
PV = "#3B0F70FF",
Astro = "#bdbdbd",
Oligo = "#bdbdbd",
LAMP5_NOS1 = "#bdbdbd",
OPC = "#bdbdbd",
`L2-3_CUX2` = "#bdbdbd",
`L5-6_TLE4` = "#bdbdbd",
`L5-6_THEMIS` = "#bdbdbd",
`L4_RORB` = "#bdbdbd",
`Vas` = "#bdbdbd",
`PN_dev` = "#bdbdbd")
(p <- DimPlot(obj.i,group.by = "major_clust") + scale_color_manual(values = colors.PFC) + theme_void())
ggsave("./out/rev_rare/Dimplot.0.1.png",p,width = 5,height = 4)
(p <- DimPlot(snRNA.hpfc.sct,group.by = "major_clust")+ scale_color_manual(values = colors.PFC)+ theme_void())
ggsave("./out/rev_rare/Dimplot.all.png",p,width = 5,height = 4)