thresh.2 is a parameter of great importance. We have demonstrated that the marker gene’s specificity and the expression levels are negatively correlated. Here is the script showing the usage and the results of thresh.2 .

1 load and Process Brain Data

we use PFC data, and use different thresh.2 to exhibit the ability of starTracer

Idents of Seurat should are set to be sub_clust . We here will use a subset of the PFC data: projection neurons (PN), as the analysis object.

snRNA_PN <- snRNA.hpfc.sct[,snRNA.hpfc.sct$bio_clust %in% "PN"]

process the object snRNA_PN

snRNA_PN <- FindVariableFeatures(snRNA_PN, nfeatures = 3000)
snRNA_PN <- NormalizeData(snRNA_PN) %>% ScaleData()
snRNA_PN <- RunPCA(snRNA_PN)

find PC numer

num <- FindPCNum(snRNA_PN)
num

RunUMAP

snRNA_PN <- RunUMAP(snRNA_PN,dims = num)

DimPlot of the dataset after processing:

Idents(snRNA_PN) <- snRNA_PN$sub_clust
(p <- DimPlot(snRNA_PN,cols = rev(viridis::mako(length(unique(snRNA_PN$sub_clust))))))

2 different Threshold of thresh.2

2.1 thresh.2=0, gene.use=all

With thresh.2=0, starTracer will NOT filter out any gene. The specificity of marker genes we found will reach the highest level. Setting gene.use to all, starTracer will identify marker genes from all the genes included, not just the high variable genes.

star.thresh0 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0,num = 2,gene.use = NULL)

## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...

## using all genes as input features...

## max normalizing the matirx...

## screen genes in each cluster according to thresh.2

## calculating parameters...

## Using "RNA" as the assay to plot Heatmap...

## createing out put data...

## Analysing Complete!

(p <- use_dotplot(snRNA_PN,features = star.thresh0$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

histogram showing the specificity level of the marker genes, we can notice an enrichment in 1 (100%).

frame.star.thresh0 <- subset(star.thresh0$para_frame,star.thresh0$para_frame$gene %in% star.thresh0$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh0)[1] <- "cluster"
frame.star.thresh0 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh0)
(p <- ggplot(data=frame.star.thresh0, aes(x = pct.pos)) + 
  geom_density(binwidth = 0.1,fill = "#69b3a2", 
                 color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))

## Warning in geom_density(binwidth = 0.1, fill = "#69b3a2", color = "black", :
## Ignoring unknown parameters: `binwidth`

expression level

expr.thresh0 <- snRNA_PN@assays$RNA@data[star.thresh0$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh0) <- "expr"
(p <- ggplot(data=expr.thresh0, aes(x = expr)) + 
  geom_histogram(fill = "#69b3a2",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.2 thresh2 = 0.2, gene.use = all

star.thresh02 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.2,num = 2,gene.use = NULL)

## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...

## using all genes as input features...

## max normalizing the matirx...

## screen genes in each cluster according to thresh.2

## calculating parameters...

## Using "RNA" as the assay to plot Heatmap...

## createing out put data...

## Analysing Complete!

(p <- use_dotplot(snRNA_PN,features = star.thresh02$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

histogram:

frame.star.thresh02 <- subset(star.thresh02$para_frame,star.thresh02$para_frame$gene %in% star.thresh02$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh02)[1] <- "cluster"
frame.star.thresh02 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh02)
(p <- ggplot(data=frame.star.thresh02, aes(x = pct.pos)) + 
  geom_density(binwidth = 0.1,fill = "#79c890", 
                 color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))

## Warning in geom_density(binwidth = 0.1, fill = "#79c890", color = "black", :
## Ignoring unknown parameters: `binwidth`

expression level

expr.thresh02 <- snRNA_PN@assays$RNA@data[star.thresh02$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh02) <- "expr"
(p <- ggplot(data=expr.thresh02, aes(x = expr)) + 
  geom_histogram(fill = "#79c890",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.3 thresh2 = 0.4, gene.use = all

star.thresh04 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.4,num = 2,gene.use = NULL)

## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...

## using all genes as input features...

## max normalizing the matirx...

## screen genes in each cluster according to thresh.2

## calculating parameters...

## Using "RNA" as the assay to plot Heatmap...

## createing out put data...

## Analysing Complete!

(p <- use_dotplot(snRNA_PN,features = star.thresh04$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

histogram

frame.star.thresh04 <- subset(star.thresh04$para_frame,star.thresh04$para_frame$gene %in% star.thresh04$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh04)[1] <- "cluster"
frame.star.thresh04 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh04)
(p <- ggplot(data=frame.star.thresh04, aes(x = pct.pos)) + 
  geom_density(binwidth = 0.1,fill = "#eca67d", 
                 color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))

## Warning in geom_density(binwidth = 0.1, fill = "#eca67d", color = "black", :
## Ignoring unknown parameters: `binwidth`

expression level

expr.thresh04 <- snRNA_PN@assays$RNA@data[star.thresh04$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh04) <- "expr"
(p <- ggplot(data=expr.thresh04, aes(x = expr)) + 
  geom_histogram(fill = "#eca67d",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.4 thresh2 = 0.6, gene.use = all

star.thresh06 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.6,num = 2,gene.use = NULL)

## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...

## using all genes as input features...

## max normalizing the matirx...

## screen genes in each cluster according to thresh.2

## calculating parameters...

## Using "RNA" as the assay to plot Heatmap...

## createing out put data...

## Analysing Complete!

(p <- use_dotplot(snRNA_PN,features = star.thresh06$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

histogram

frame.star.thresh06 <- subset(star.thresh06$para_frame,star.thresh06$para_frame$gene %in% star.thresh06$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh06)[1] <- "cluster"
frame.star.thresh06 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh06)
(p <- ggplot(data=frame.star.thresh06, aes(x = pct.pos)) + 
  geom_density(binwidth = 0.1,fill = "#eb7471", 
                 color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))

## Warning in geom_density(binwidth = 0.1, fill = "#eb7471", color = "black", :
## Ignoring unknown parameters: `binwidth`

expression level

expr.thresh06 <- snRNA_PN@assays$RNA@data[star.thresh06$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh06) <- "expr"
(p <- ggplot(data=expr.thresh06, aes(x = expr)) + 
  geom_histogram(fill = "#eb7471",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.5 thresh2 = 0.8, gene.use = all

star.thresh08 <- starTracer::searchMarker(snRNA_PN,thresh.1 = 0.5,thresh.2 = 0.8,num = 2,gene.use = NULL)

## using L2_CUX2_LAMP5, L5-6_TLE4_SORCS1, L3_CUX2_PRSS12, L5-6_THEMIS_CNR1, L5-6_TLE4_SCUBE1, L5-6_TLE4_HTR2C, L5-6_THEMIS_NTNG2, L4_RORB_LRRK1, L4_RORB_MET, L4_RORB_dev-1, L4_RORB_MME, L2-3_CUX2_dev-1, L2-3_CUX2_dev-5, L2-3_CUX2_dev-3, L2_CUX2_LAMP5_dev as ident...

## using all genes as input features...

## max normalizing the matirx...

## screen genes in each cluster according to thresh.2

## calculating parameters...

## Using "RNA" as the assay to plot Heatmap...

## createing out put data...

## Analysing Complete!

(p <- use_dotplot(snRNA_PN,features = star.thresh08$genes.markers, colors.gradient = c("#018daa","#ffffff","#ed463a")))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

we next analyze the gene’s mean expression level and the composition of the gene type

histogram

frame.star.thresh08 <- subset(star.thresh08$para_frame,star.thresh08$para_frame$gene %in% star.thresh08$genes.markers)[,c("max.X","gene")]
colnames(frame.star.thresh08)[1] <- "cluster"
frame.star.thresh08 <- cal_spe(snRNA_PN,ident.use = "sub_clust",frame.star.thresh08)
(p <- ggplot(data=frame.star.thresh08, aes(x = pct.pos)) + 
  geom_density(binwidth = 0.1,fill = "#ae1978", 
                 color = "black", alpha=0.9) + theme_bw() + labs(x="",y=""))

## Warning in geom_density(binwidth = 0.1, fill = "#ae1978", color = "black", :
## Ignoring unknown parameters: `binwidth`

expression level

expr.thresh08 <- snRNA_PN@assays$RNA@data[star.thresh08$genes.markers,] %>% rowMeans() %>% as.data.frame()
colnames(expr.thresh08) <- "expr"
(p <- ggplot(data=expr.thresh08, aes(x = expr)) + 
  geom_histogram(fill = "#ae1978",color = "black",alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3 Put all the density plot together

3.1 Specificity: Density Plot

The distribution of the specificity in all samples.

mat.expre.spe <- data.frame(thresh0 = frame.star.thresh0$pct.pos,
                               thresh02 = frame.star.thresh02$pct.pos,
                               thresh04 = frame.star.thresh04$pct.pos,
                               thresh06 = frame.star.thresh06$pct.pos,
                               thresh08 = frame.star.thresh08$pct.pos)
colnames(mat.expre.spe) <- c("thresh0","thresh02","thresh04","thresh06","thresh08")
mat.expre.spe <- reshape2::melt(mat.expre.spe)

## No id variables; using all as measure variables

head(mat.expre.spe)

##   variable value
## 1  thresh0     1
## 2  thresh0     1
## 3  thresh0     1
## 4  thresh0     1
## 5  thresh0     1
## 6  thresh0     1

(p <- ggplot(data = mat.expre.spe) + 
  geom_density(aes(x = value, fill = variable, color = variable), alpha = 0.4) + 
  theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()) + 
  scale_fill_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")) +
  scale_color_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")))

mat.expre.thresh <- data.frame(thresh0 = expr.thresh0,
                               thresh02 = expr.thresh02,
                               thresh04 = expr.thresh04,
                               thresh06 = expr.thresh06,
                               thresh08 = expr.thresh08)
colnames(mat.expre.thresh) <- c("thresh0","thresh02","thresh04","thresh06","thresh08")
mat.expre.thresh <- reshape2::melt(mat.expre.thresh)

## No id variables; using all as measure variables

head(mat.expre.thresh)

##   variable        value
## 1  thresh0 3.213416e-04
## 2  thresh0 1.959860e-04
## 3  thresh0 8.632410e-04
## 4  thresh0 9.931124e-05
## 5  thresh0 6.863646e-04
## 6  thresh0 4.294552e-04

3.2 Specificity: Line Plot

mat.pct.pos.thresh <- data.frame(thresh0 = frame.star.thresh0$pct.pos,
                                 thresh02 = frame.star.thresh02$pct.pos,
                                 thresh04 = frame.star.thresh04$pct.pos,
                                 thresh06 = frame.star.thresh06$pct.pos,
                                 thresh08 = frame.star.thresh08$pct.pos)
colnames(mat.pct.pos.thresh) <- c("thresh0","thresh02","thresh04","thresh06","thresh08")
mat.pct.pos.thresh <- reshape2::melt(mat.pct.pos.thresh)

## No id variables; using all as measure variables

mat.pct.pos.thresh <- Rmisc::summarySE(mat.pct.pos.thresh, measurevar = "value", groupvar = c("variable"))

plot

(p <- ggplot(data = mat.pct.pos.thresh) + 
  geom_errorbar(aes(x = variable,y = value, ymin = value - se, ymax = value + se, color = variable), width = 0.02) + 
  geom_line(aes(x = variable,y = value, group = 1),color = "black",lwd = 0.5) +
  geom_point(aes(x = variable,y = value, color = variable)) + 
  scale_color_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")) + 
  theme_bw() + ylab("percent.positive"))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

3.3 Mean Expression Level

(p <- ggplot(data=mat.expre.thresh) + 
  geom_boxplot(aes(x = variable,y = value,fill = variable, color = variable),alpha = 0.5) + theme_bw() + labs(x="",y="") + theme(panel.grid = element_blank()) + 
  scale_fill_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")) +
  scale_color_manual(values = c("#69b3a2","#79c890","#eca67d","#eb7471","#ae1978")))

Different_threshold.2

Feiyang ZHANG

2023-04-06

1 load and Process Brain Data

2 different Threshold of thresh.2

2.1 thresh.2=0, gene.use=all

2.2 thresh2 = 0.2, gene.use = all

2.3 thresh2 = 0.4, gene.use = all

2.4 thresh2 = 0.6, gene.use = all

2.5 thresh2 = 0.8, gene.use = all

3 Put all the density plot together

3.1 Specificity: Density Plot

3.2 Specificity: Line Plot

3.3 Mean Expression Level