Alelle specific gene expression analysis for single cell RNA sequencing

This vignette uses gene and single nucleotide variant (SNV) expression files generated from the Annotate Various Features for Alignment.

0. Perform cell clustering with Seurat

We begin by performing cell clustering based on gene expression, which is identical to the first step outlined in the Alternative splicing analysis for scRNA-seq. If you’re continuing from the previous vignette, you are free to skip this section.

require(Yano)

Loading required package: Yano

── Attaching packages ────────────────────────────────────────────── Yano 1.2 ──
✔ dplyr   1.1.4     ✔ Seurat  5.3.0
✔ ggplot2 3.5.2

exp <- ReadPISA("./exp/")

obj <- CreateSeuratObject(exp, min.features = 1000, min.cells = 10)
obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^MT-")
obj <- subset(obj, nFeature_RNA < 9000 & percent.mt < 20)

# Downsampling to 2000 cells for fast testing
obj <- obj[, sample(colnames(obj),2000)]

# To improve visualization, we reduced the resolution when identifying cell clusters.
obj <- NormalizeData(obj) %>% FindVariableFeatures() %>% ScaleData() %>%  RunPCA(verbose=FALSE) %>% FindNeighbors(dims = 1:10, verbose=FALSE) %>% FindClusters(resolution = 0.1, verbose=FALSE) %>% RunUMAP(dims=1:10, verbose=FALSE)

Normalizing layer: counts
Finding variable features for layer counts
Centering and scaling data matrix

DimPlot(obj, label=TRUE, label.size = 5, label.box = TRUE)

1. Perform allele-specific gene expression analysis by using SNV anchors

In this vignette, when a genetic variant is detected, all possible alleles at that location, including both alternative alleles and the reference allele, are annotated and counted. Therefore, genetic variants refer to both alternative alleles and the reference allele in this context.

var <- ReadPISA("./var")
obj[['var']] <- CreateAssayObject(var[,colnames(obj)], min.cells = 10)
# Switch working assay to 'var'
DefaultAssay(obj) <- "var"

obj <- NormalizeData(obj)

Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.
ℹ The deprecated feature was likely used in the Seurat package.
  Please report the issue at <https://github.com/satijalab/seurat/issues>.

Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.
ℹ The deprecated feature was likely used in the Seurat package.
  Please report the issue at <https://github.com/satijalab/seurat/issues>.

# you can skip to read GTF again if you already load it
gtf <- gtf2db("./gencode.v44.annotation.gtf.gz")

[2025-06-19 18:09:11] GTF loading..
[2025-06-19 18:09:32] Load 62700 genes.

# At this moment the meta infomation for genetic variant assay is still empty
obj[['var']][[]] %>% head

data frame with 0 columns and 6 rows

obj <- annoVAR(obj, gtf = gtf)

# After annotation, the locations are parsed from the feature name, and overlapped gene are also annotated.
obj[['var']][[]] %>% head %>% knitr::kable()

	chr	start	ref	alt	strand	locus	gene_name	type
chr1:631862G>A/+	chr1	631862	G	A	+	chr1:631862/+	MTCO1P12	utr5
chr1:633192G=/+	chr1	633192	G	G	+	chr1:633192/+	MTCO2P12	utr5
chr1:634337T>C/+	chr1	634337	T	C	+	chr1:634337/+	MTATP6P1	utr5
chr1:826352C>T/-	chr1	826352	C	T	-	chr1:826352/-	LINC00115	utr5
chr1:853876T>C/+	chr1	853876	T	C	+	chr1:853876/+	LINC01128	utr5
chr1:944296G>A/-	chr1	944296	G	A	-	chr1:944296/-	NOC2L	utr3

# Select autocorrlated genetic variants 
obj <- RunAutoCorr(obj)

In the alternative splicing analysis, we use mode 1 (the default mode). However, for allele-specific gene expression, we switch to mode 2 to compare the expression of an SNV with other SNVs at the same location. Since no binding assay is specified, the records at the same locus are first aggregated, and then the test feature is subtracted from the aggregated data (mode 2).

obj <- RunSDT(object = obj, bind.name = "locus", mode = 2)

Working on assay var.

Use predefined weight matrix "pca_wm".

Processing 12923 features.

No bind.assay specified, update min.features.per.block to 2.

Processing 4016 binding features.

Use "counts" layer for test features.

Aggregate counts for binding features.

Using 7 threads.

Runtime : 19.56211 secs.

sel.chrs <- c(1:21, "X")
FbtPlot(obj, val = "locus.padj", remove.chr = TRUE, sel.chrs = sel.chrs)

# Let's random pick a pair of features at the same locus
obj[['var']][[]] %>% filter(locus.padj < 1e-10) %>% knitr::kable()

	chr	start	ref	alt	strand	locus	gene_name	type	moransi	autocorr.variable	locus.D	locus.t
chr1:87328836C>G/+	chr1	87328836	C	G	+	chr1:87328836/+	LMO4	upstream	0.3119438	TRUE	0.4014965	9.116663
chr10:3136689C>T/+	chr10	3136689	C	T	+	chr10:3136689/+	PFKP	utr5	0.1016939	TRUE	0.3286975	9.346388
chr10:12250345A>G/+	chr10	12250345	A	G	+	chr10:12250345/+	CDC123	utr5	0.0443176	TRUE	0.3683718	9.684959
chr10:17237326C>T/+	chr10	17237326	C	T	+	chr10:17237326/+	VIM	utr5	0.6896495	TRUE	1.5378650	30.294583
chr10:17237326C=/+	chr10	17237326	C	C	+	chr10:17237326/+	VIM	utr5	0.5869554	TRUE	1.0154665	30.549615
chr10:62069780C>T/+	chr10	62069780	C	T	+	chr10:62069780/+	ARID5B	exon	0.1124084	TRUE	0.4595768	11.547842
chr10:62069780C=/+	chr10	62069780	C	C	+	chr10:62069780/+	ARID5B	exon	0.1011137	TRUE	0.4199300	9.887218
chr10:71816550C=/-	chr10	71816550	C	C	-	chr10:71816550/-	PSAP	utr3	0.2927699	TRUE	0.5403844	15.283752
chr2:74969604A>G/+	chr2	74969604	A	G	+	chr2:74969604/+	POLE4	utr5	0.1265709	TRUE	0.5748608	19.894070
chr6:30009771C>G/+	chr6	30009771	C	G	+	chr6:30009771/+	ENSG00000290574	utr5	0.0613360	TRUE	0.3620673	8.586900
chr6:31268945C>T/-	chr6	31268945	C	T	-	chr6:31268945/-	HLA-C	utr3	0.2901833	TRUE	0.5884890	8.919961
chrM:8860A>G/+	chrM	8860	A	G	+	chrM:8860/+	MT-ATP6	exon	0.1594867	TRUE	0.4702824	10.359465

FeaturePlot(obj, features = c("chr10:17237326C>T/+", "chr10:17237326C=/+", "VIM" ), order=TRUE, pt.size = 1, ncol=3)

Warning: The `slot` argument of `FetchData()` is deprecated as of SeuratObject 5.0.0.
ℹ Please use the `layer` argument instead.
ℹ The deprecated feature was likely used in the Seurat package.
  Please report the issue at <https://github.com/satijalab/seurat/issues>.

Warning: Could not find VIM in the default search locations, found in 'RNA'
assay instead

# We could also zoom in specific region
FbtPlot(obj, val = "locus.padj", chr="chr10", start = 17225000, end = 17240000, gtf = gtf)

Zoom in chr10 : 17224000-17241000

It appears that the T allele of the gene VIM is expressed in most cell groups, while the C allele seems to be primarily expressed in groups 1 and 4.

Using our spatial dissimilarity analysis method, we have identified many cases of allele-specific gene expression. This phenomenon can be influenced by several known and unknown mechanisms, including gene imprinting, genetic recombination, and somatic variants during cell or disease development. While we won’t explore these mechanisms in detail here, you can refer to our manuscript, which includes several case studies dedicated to this topic.

# Save the Seurat object for further use
saveRDS(obj, file = "glioblastoma5k.rds")

Command(obj)

 [1] "NormalizeData.RNA"        "FindVariableFeatures.RNA"
 [3] "ScaleData.RNA"            "RunPCA.RNA"              
 [5] "FindNeighbors.RNA.pca"    "FindClusters"            
 [7] "RunUMAP.RNA.pca"          "NormalizeData.var"       
 [9] "ParseVarName.var"         "annoVAR.var"             
[11] "RunAutoCorr.var.pca"      "SetAutoCorrFeatures.var" 
[13] "RunSDT.var"

sessionInfo()

R version 4.5.0 (2025-04-11)
Platform: aarch64-apple-darwin24.4.0
Running under: macOS Sequoia 15.5

Matrix products: default
BLAS:   /opt/homebrew/Cellar/openblas/0.3.29/lib/libopenblasp-r0.3.29.dylib 
LAPACK: /opt/homebrew/Cellar/r/4.5.0/lib/R/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Asia/Shanghai
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] future_1.49.0      dplyr_1.1.4        Seurat_5.3.0       SeuratObject_5.1.0
[5] sp_2.2-0           ggplot2_3.5.2      Yano_1.2          

loaded via a namespace (and not attached):
  [1] deldir_2.0-4           pbapply_1.7-2          gridExtra_2.3         
  [4] rlang_1.1.6            magrittr_2.0.3         RcppAnnoy_0.0.22      
  [7] spatstat.geom_3.3-6    matrixStats_1.5.0      ggridges_0.5.6        
 [10] compiler_4.5.0         png_0.1-8              vctrs_0.6.5           
 [13] reshape2_1.4.4         stringr_1.5.1          pkgconfig_2.0.3       
 [16] fastmap_1.2.0          labeling_0.4.3         promises_1.3.2        
 [19] rmarkdown_2.29         purrr_1.0.4            xfun_0.52             
 [22] jsonlite_2.0.0         goftest_1.2-3          later_1.4.2           
 [25] spatstat.utils_3.1-4   irlba_2.3.5.1          parallel_4.5.0        
 [28] cluster_2.1.8.1        R6_2.6.1               ica_1.0-3             
 [31] stringi_1.8.7          RColorBrewer_1.1-3     spatstat.data_3.1-6   
 [34] reticulate_1.42.0      spatstat.univar_3.1-3  parallelly_1.44.0     
 [37] lmtest_0.9-40          scattermore_1.2        Rcpp_1.0.14           
 [40] knitr_1.50             tensor_1.5             future.apply_1.11.3   
 [43] zoo_1.8-14             R.utils_2.13.0         sctransform_0.4.2     
 [46] httpuv_1.6.16          Matrix_1.7-3           splines_4.5.0         
 [49] igraph_2.1.4           tidyselect_1.2.1       viridis_0.6.5         
 [52] abind_1.4-8            yaml_2.3.10            spatstat.random_3.3-3 
 [55] codetools_0.2-20       miniUI_0.1.2           spatstat.explore_3.4-2
 [58] listenv_0.9.1          lattice_0.22-6         tibble_3.2.1          
 [61] plyr_1.8.9             withr_3.0.2            shiny_1.10.0          
 [64] ROCR_1.0-11            evaluate_1.0.3         Rtsne_0.17            
 [67] fastDummies_1.7.5      survival_3.8-3         polyclip_1.10-7       
 [70] fitdistrplus_1.2-2     pillar_1.10.2          KernSmooth_2.23-26    
 [73] plotly_4.10.4          generics_0.1.4         RcppHNSW_0.6.0        
 [76] scales_1.4.0           gtools_3.9.5           globals_0.18.0        
 [79] xtable_1.8-4           glue_1.8.0             lazyeval_0.2.2        
 [82] tools_4.5.0            data.table_1.17.2      RSpectra_0.16-2       
 [85] RANN_2.6.2             dotCall64_1.2          cowplot_1.1.3         
 [88] grid_4.5.0             tidyr_1.3.1            nlme_3.1-168          
 [91] patchwork_1.3.0        cli_3.6.5              spatstat.sparse_3.1-0 
 [94] spam_2.11-1            viridisLite_0.4.2      uwot_0.2.3            
 [97] gtable_0.3.6           R.methodsS3_1.8.2      digest_0.6.37         
[100] progressr_0.15.1       ggrepel_0.9.6          htmlwidgets_1.6.4     
[103] farver_2.1.2           htmltools_0.5.8.1      R.oo_1.27.1           
[106] lifecycle_1.0.4        httr_1.4.7             mime_0.13             
[109] MASS_7.3-65