thresh.2
is a parameter of great importance. We have
demonstrated that the marker gene’s specificity and the expression
levels are negatively correlated. Here is the script showing the usage
and the results of thresh.2
.
we use PFC data, and use different thresh.2
to exhibit
the ability of starTracer
Idents of Seurat should are set to be sub_clust
. We
here will use a subset of the PFC data: projection neurons (PN), as the
analysis object.
snRNA_PN <- snRNA.hpfc.sct[,snRNA.hpfc.sct$bio_clust %in% "PN"]
process the object snRNA_PN
snRNA_PN <- FindVariableFeatures(snRNA_PN, nfeatures = 3000)
snRNA_PN <- NormalizeData(snRNA_PN) %>% ScaleData()
snRNA_PN <- RunPCA(snRNA_PN)
find PC numer
num <- FindPCNum(snRNA_PN)
num
RunUMAP
snRNA_PN <- RunUMAP(snRNA_PN,dims = num)
DimPlot of the dataset after processing:
Idents(snRNA_PN) <- snRNA_PN$sub_clust
(p <- DimPlot(snRNA_PN,cols = rev(viridis::mako(length(unique(snRNA_PN$sub_clust))))))
With thresh.2=0
, starTracer will NOT filter out any
gene. The specificity of marker genes we found will reach the highest
level. Setting gene.use to all, starTracer will identify marker genes
from all the genes included, not just the high variable genes.
star.thresh0 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0,num = 2,gene.use = NULL)
## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
(p <- use_dotplot(snRNA_PN,features = star.thresh0$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
histogram showing the specificity level of the marker genes, we can notice an enrichment in 1 (100%).
frame.star.thresh0 <- subset(star.thresh0$para_frame,star.thresh0$para_frame$gene %in% star.thresh0$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh0)[1] <- "cluster"
frame.star.thresh0 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh0)
(p <- ggplot(data=frame.star.thresh0, aes(x = pct.pos)) +
geom_density(binwidth = 0.1,fill = "#69b3a2",
color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))
## Warning in geom_density(binwidth = 0.1, fill = "#69b3a2", color = "black", :
## Ignoring unknown parameters: `binwidth`
expression level
expr.thresh0 <- snRNA_PN@assays$RNA@data[star.thresh0$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh0) <- "expr"
(p <- ggplot(data=expr.thresh0, aes(x = expr)) +
geom_histogram(fill = "#69b3a2",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
star.thresh02 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.2,num = 2,gene.use = NULL)
## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
(p <- use_dotplot(snRNA_PN,features = star.thresh02$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
histogram:
frame.star.thresh02 <- subset(star.thresh02$para_frame,star.thresh02$para_frame$gene %in% star.thresh02$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh02)[1] <- "cluster"
frame.star.thresh02 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh02)
(p <- ggplot(data=frame.star.thresh02, aes(x = pct.pos)) +
geom_density(binwidth = 0.1,fill = "#79c890",
color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))
## Warning in geom_density(binwidth = 0.1, fill = "#79c890", color = "black", :
## Ignoring unknown parameters: `binwidth`
expression level
expr.thresh02 <- snRNA_PN@assays$RNA@data[star.thresh02$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh02) <- "expr"
(p <- ggplot(data=expr.thresh02, aes(x = expr)) +
geom_histogram(fill = "#79c890",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
star.thresh04 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.4,num = 2,gene.use = NULL)
## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
(p <- use_dotplot(snRNA_PN,features = star.thresh04$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
histogram
frame.star.thresh04 <- subset(star.thresh04$para_frame,star.thresh04$para_frame$gene %in% star.thresh04$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh04)[1] <- "cluster"
frame.star.thresh04 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh04)
(p <- ggplot(data=frame.star.thresh04, aes(x = pct.pos)) +
geom_density(binwidth = 0.1,fill = "#eca67d",
color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))
## Warning in geom_density(binwidth = 0.1, fill = "#eca67d", color = "black", :
## Ignoring unknown parameters: `binwidth`
expression level
expr.thresh04 <- snRNA_PN@assays$RNA@data[star.thresh04$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh04) <- "expr"
(p <- ggplot(data=expr.thresh04, aes(x = expr)) +
geom_histogram(fill = "#eca67d",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
star.thresh06 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.6,num = 2,gene.use = NULL)
## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
(p <- use_dotplot(snRNA_PN,features = star.thresh06$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
histogram
frame.star.thresh06 <- subset(star.thresh06$para_frame,star.thresh06$para_frame$gene %in% star.thresh06$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh06)[1] <- "cluster"
frame.star.thresh06 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh06)
(p <- ggplot(data=frame.star.thresh06, aes(x = pct.pos)) +
geom_density(binwidth = 0.1,fill = "#eb7471",
color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))
## Warning in geom_density(binwidth = 0.1, fill = "#eb7471", color = "black", :
## Ignoring unknown parameters: `binwidth`
expression level
expr.thresh06 <- snRNA_PN@assays$RNA@data[star.thresh06$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh06) <- "expr"
(p <- ggplot(data=expr.thresh06, aes(x = expr)) +
geom_histogram(fill = "#eb7471",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
star.thresh08 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.8,num = 2,gene.use = NULL)
## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...
## using all genes as input features...
## max normalizing the matirx...
## screen genes in each cluster according to thresh.2
## calculating parameters...
## Using "RNA" as the assay to plot Heatmap...
## createing out put data...
## Analysing Complete!
(p <- use_dotplot(snRNA_PN,features = star.thresh08$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
we next analyze the gene’s mean expression level and the composition of the gene type
histogram
frame.star.thresh08 <- subset(star.thresh08$para_frame,star.thresh08$para_frame$gene %in% star.thresh08$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh08)[1] <- "cluster"
frame.star.thresh08 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh08)
(p <- ggplot(data=frame.star.thresh08, aes(x = pct.pos)) +
geom_density(binwidth = 0.1,fill = "#ae1978",
color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))
## Warning in geom_density(binwidth = 0.1, fill = "#ae1978", color = "black", :
## Ignoring unknown parameters: `binwidth`
expression level
expr.thresh08 <- snRNA_PN@assays$RNA@data[star.thresh08$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh08) <- "expr"
(p <- ggplot(data=expr.thresh08, aes(x = expr)) +
geom_histogram(fill = "#ae1978",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
The distribution of the specificity in all samples.
mat.expre.spe <- data.frame(thresh0 = frame.star.thresh0$pct.pos,
thresh02 = frame.star.thresh02$pct.pos,
thresh04 = frame.star.thresh04$pct.pos,
thresh06 = frame.star.thresh06$pct.pos,
thresh08 = frame.star.thresh08$pct.pos)
colnames(mat.expre.spe) <- c("thresh0","thresh02","thresh04","thresh06","thresh08")
mat.expre.spe <- reshape2::melt(mat.expre.spe)
## No id variables; using all as measure variables
head(mat.expre.spe)
## variable value
## 1 thresh0 1
## 2 thresh0 1
## 3 thresh0 1
## 4 thresh0 1
## 5 thresh0 1
## 6 thresh0 1
(p <- ggplot(data = mat.expre.spe) +
geom_density(aes(x = value, fill = variable, color = variable), alpha = 0.4) +
theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()) +
scale_fill_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")) +
scale_color_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")))
mat.expre.thresh <- data.frame(thresh0 = expr.thresh0,
thresh02 = expr.thresh02,
thresh04 = expr.thresh04,
thresh06 = expr.thresh06,
thresh08 = expr.thresh08)
colnames(mat.expre.thresh) <- c("thresh0","thresh02","thresh04","thresh06","thresh08")
mat.expre.thresh <- reshape2::melt(mat.expre.thresh)
## No id variables; using all as measure variables
head(mat.expre.thresh)
## variable value
## 1 thresh0 3.213416e-04
## 2 thresh0 1.959860e-04
## 3 thresh0 8.632410e-04
## 4 thresh0 9.931124e-05
## 5 thresh0 6.863646e-04
## 6 thresh0 4.294552e-04
mat.pct.pos.thresh <- data.frame(thresh0 = frame.star.thresh0$pct.pos,
thresh02 = frame.star.thresh02$pct.pos,
thresh04 = frame.star.thresh04$pct.pos,
thresh06 = frame.star.thresh06$pct.pos,
thresh08 = frame.star.thresh08$pct.pos)
colnames(mat.pct.pos.thresh) <- c("thresh0","thresh02","thresh04","thresh06","thresh08")
mat.pct.pos.thresh <- reshape2::melt(mat.pct.pos.thresh)
## No id variables; using all as measure variables
mat.pct.pos.thresh <- Rmisc::summarySE(mat.pct.pos.thresh, measurevar = "value", groupvar = c("variable"))
plot
(p <- ggplot(data = mat.pct.pos.thresh) +
geom_errorbar(aes(x = variable,y = value, ymin = value - se, ymax = value + se, color = variable), width = 0.02) +
geom_line(aes(x = variable,y = value, group = 1),color = "black",lwd = 0.5) +
geom_point(aes(x = variable,y = value, color = variable)) +
scale_color_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")) +
theme_bw() + ylab("percent.positive"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
(p <- ggplot(data=mat.expre.thresh) +
geom_boxplot(aes(x = variable,y = value,fill = variable, color = variable),alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()) +
scale_fill_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")) +
scale_color_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")))