Dear authors, I find the nCount_ATAC in the metadata will decrease for all the cells after running TSSEnrichment(ATAC, fast = FALSE) for a Seurat object with snATAC data from several samples (with separate fragment.tsv.gz). I checked the original code for TSSEnrichment but have no idea why this will happen.
I have tried on my own data and public data, the same problem emerges. The problem will only occur when fast set to FALSE and with number of samples >=2.
public data used:
- https://storage.googleapis.com/linnarsson-lab-human/ATAC_dev/10X/10X290_1_ABCD_1.tar.gz
- https://storage.googleapis.com/linnarsson-lab-human/ATAC_dev/10X/10X346_2_ABCD_2.tar.gz
# 1. QC on single sample =========================================
# get gene annotations for hg38
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevels(annotation) <- paste0('chr', seqlevels(annotation))
counts=Read10X_h5(paste0(Parent_Dir,df$filename[i],"filtered_feature_bc_matrix.h5"))
# create a Seurat object containing the RNA adata
S290_1 <- CreateSeuratObject(
counts = counts$`Gene Expression`,
assay = "RNA"
)
S290_1[["percent.mito"]] <- PercentageFeatureSet(S290_1, pattern = "^MT-")
S290_1[["percent.rb"]] <- PercentageFeatureSet(S290_1, pattern = "^RP[SL]")
genes <- rownames(S290_1)
remain <- genes[-grep(pattern = "^GM1|^GM2|^GM3|^GM4|^GM5|^GM6|^GM7|^GM8|^GM9|RIK", x = genes)]
S290_1 <- subset(S290_1, features = remain)
# create ATAC assay and add it to the object
S290_1[["ATAC"]] <- CreateChromatinAssay(
counts = counts$Peaks,
sep = c(":", "-"),
fragments = fragpath,
annotation = annotation
)
S290_1@meta.data$orig.ident <- paste0('S',df$Sample_ID[[i]])
S290_1 <- NucleosomeSignal(S290_1)
S290_1 <- TSSEnrichment(S290_1)
S290_1$blacklist_fraction <- FractionCountsInRegion(object = S290_1, assay = 'ATAC',regions = blacklist_hg38)
# filter out low quality cells
max_nFeature_RNA = quantile(S290_1$nFeature_RNA, 0.95)
S290_1 <- subset(
x = S290_1,
subset = nCount_ATAC < 100000 &
nFeature_RNA < max_nFeature_RNA &
nCount_ATAC > 500 &
nFeature_RNA > 500 &
nucleosome_signal < 2 &
TSS.enrichment > 2 &
#blacklist_fraction < 0.05 & #暂时没有进行这项质控
percent.mito < 5 & percent.rb < 10
)
meta_data <- S290_1@meta.data
meta_dir=paste0(Parent_Dir,"Meta/",S290_1@meta.data$orig.ident[1],"_meta_0603.csv")
# 导出为 CSV 文件
write.csv(meta_data, file = meta_dir, row.names = TRUE)
2. Combine two samples =======================================
peaks1=read.table(
file = paste0(Parent_Dir,'10X290_1_ABCD_1/atac_peaks.bed'),
col.names = c("chr", "start", "end")
)
peaks1 <- makeGRangesFromDataFrame(peaks1)
peaks2=read.table(
file = paste0(Parent_Dir,'10X346_2_ABCD_2/atac_peaks.bed'),
col.names = c("chr", "start", "end")
)
peaks2 <- makeGRangesFromDataFrame(peaks2)
chr<-c()
for (i in 1:22){
chr<-c(chr, paste0("chr",i))
}
chr<-c(chr,"chrX")
chr<-c(chr,"chrY")
peaks1<-peaks1[seqnames(peaks1) %in% chr]
peaks2<-peaks2[seqnames(peaks2) %in% chr]
peaks=list(peaks1,peaks2)
peaks_gl <- GRangesList(peaks) # list -> GRangesList
all_peaks <- unlist(peaks_gl, use.names=FALSE)
combined.peaks <- reduce(all_peaks)
# Filter out bad peaks based on length
peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths < 10000 & peakwidths > 20]
Meta1=read.table(
file = paste0(Parent_Dir,'Meta/S290_1_meta_0603.csv'),
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)
Meta2=read.table(
file = paste0(Parent_Dir,'Meta/S346_2_meta_0603.csv'),
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)
frags1= CreateFragmentObject(
path = paste0(Parent_Dir,"10X290_1_ABCD_1/atac_fragments.tsv.gz"),
cells = rownames(Meta1) #根据metadata里的细胞保留
)
frags2= CreateFragmentObject(
path = paste0(Parent_Dir,"10X346_2_ABCD_2/atac_fragments.tsv.gz"),
cells = rownames(Meta2) #根据metadata里的细胞保留
)
counts1=FeatureMatrix(
fragments = frags1,
features = combined.peaks,
cells = rownames(Meta1)
)
counts2=FeatureMatrix(
fragments = frags2,
features = combined.peaks,
cells = rownames(Meta2)
)
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevels(annotation) <- paste0('chr', seqlevels(annotation))
# seqlevelsStyle(annotation) <- 'UCSC'
genome(annotation) <- "hg38"
assay1=CreateChromatinAssay(counts1, fragments = frags1,annotation = annotation)
assay2=CreateChromatinAssay(counts2, fragments = frags2,annotation = annotation)
obj1=CreateSeuratObject(assay1, assay = "ATAC", meta.data=Meta1)
obj2=CreateSeuratObject(assay2, assay = "ATAC", meta.data=Meta2)
# merge all datasets, adding a cell ID to make sure cell names are unique
combined <- merge(
x = obj1,
y = obj2,
add.cell.ids = c('1','2')
)
# 3. check the nCount_ATAC ===========================
summary(combined@meta.data$nCount_ATAC)
Meta=combined@meta.data
table(Meta[Meta$nCount_ATAC<500,'orig.ident'])
# 4. run the TssEnrichment =================================
ATAC=combined
ATAC = TSSEnrichment(ATAC, fast = FALSE)
summary(ATAC@meta.data$nCount_ATAC)
Meta=ATAC@meta.data
table(Meta[Meta$nCount_ATAC<500,'orig.ident'])
table(ATAC@meta.data$nCount_ATAC<combined@meta.data$nCount_ATAC)
You will find the table(ATAC@meta.data$nCount_ATAC<combined@meta.data$nCount_ATAC) returns TRUE
sessionInfo():
R version 4.4.3 (2025-02-28)
Platform: x86_64-conda-linux-gnu
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS/LAPACK: /public/home/yzhang/micromamba/envs/ATAC2/lib/libopenblasp-r0.3.29.so; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=zh_CN.UTF-8 LC_NUMERIC=C
[3] LC_TIME=zh_CN.UTF-8 LC_COLLATE=zh_CN.UTF-8
[5] LC_MONETARY=zh_CN.UTF-8 LC_MESSAGES=zh_CN.UTF-8
[7] LC_PAPER=zh_CN.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C
time zone: Asia/Shanghai
tzcode source: system (glibc)
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] BSgenome.Hsapiens.UCSC.hg38_1.4.5 BSgenome_1.74.0
[3] rtracklayer_1.66.0 BiocIO_1.16.0
[5] Biostrings_2.74.0 XVector_0.46.0
[7] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.30.0
[9] AnnotationFilter_1.30.0 GenomicFeatures_1.58.0
[11] AnnotationDbi_1.68.0 Biobase_2.66.0
[13] GenomicRanges_1.58.0 GenomeInfoDb_1.42.0
[15] IRanges_2.40.0 S4Vectors_0.44.0
[17] BiocGenerics_0.52.0 Seurat_5.3.0
[19] SeuratObject_5.1.0 sp_2.2-0
[21] Signac_1.14.0
loaded via a namespace (and not attached):
[1] RcppAnnoy_0.0.22 splines_4.4.3
[3] later_1.4.2 pbdZMQ_0.3-14
[5] bitops_1.0-9 tibble_3.2.1
[7] polyclip_1.10-7 rpart_4.1.24
[9] XML_3.99-0.17 fastDummies_1.7.5
[11] lifecycle_1.0.4 globals_0.17.0
[13] lattice_0.22-7 MASS_7.3-64
[15] backports_1.5.0 magrittr_2.0.3
[17] rmarkdown_2.29 Hmisc_5.2-3
[19] plotly_4.10.4 yaml_2.3.10
[21] httpuv_1.6.15 sctransform_0.4.1
[23] spam_2.11-1 spatstat.sparse_3.1-0
[25] reticulate_1.42.0 cowplot_1.1.3
[27] pbapply_1.7-2 DBI_1.2.3
[29] RColorBrewer_1.1-3 abind_1.4-5
[31] zlibbioc_1.52.0 Rtsne_0.17
[33] purrr_1.0.4 biovizBase_1.54.0
[35] RCurl_1.98-1.16 nnet_7.3-20
[37] VariantAnnotation_1.52.0 GenomeInfoDbData_1.2.13
[39] ggrepel_0.9.6 irlba_2.3.5.1
[41] listenv_0.9.1 spatstat.utils_3.1-3
[43] goftest_1.2-3 RSpectra_0.16-2
[45] spatstat.random_3.3-3 fitdistrplus_1.2-2
[47] parallelly_1.43.0 DelayedArray_0.32.0
[49] codetools_0.2-20 RcppRoll_0.3.1
[51] tidyselect_1.2.1 UCSC.utils_1.2.0
[53] farver_2.1.2 matrixStats_1.5.0
[55] base64enc_0.1-3 spatstat.explore_3.4-2
[57] GenomicAlignments_1.42.0 jsonlite_2.0.0
[59] Formula_1.2-5 progressr_0.15.1
[61] ggridges_0.5.6 survival_3.8-3
[63] tools_4.4.3 ica_1.0-3
[65] Rcpp_1.0.14 glue_1.8.0
[67] SparseArray_1.6.0 gridExtra_2.3
[69] xfun_0.52 MatrixGenerics_1.18.0
[71] IRdisplay_1.1 dplyr_1.1.4
[73] withr_3.0.2 fastmap_1.2.0
[75] digest_0.6.37 R6_2.6.1
[77] mime_0.13 colorspace_2.1-1
[79] scattermore_1.2 tensor_1.5
[81] dichromat_2.0-0.1 spatstat.data_3.1-6
[83] RSQLite_2.3.9 tidyr_1.3.1
[85] generics_0.1.3 data.table_1.17.0
[87] S4Arrays_1.6.0 httr_1.4.7
[89] htmlwidgets_1.6.4 uwot_0.2.3
[91] pkgconfig_2.0.3 gtable_0.3.6
[93] blob_1.2.4 lmtest_0.9-40
[95] htmltools_0.5.8.1 dotCall64_1.2
[97] ProtGenerics_1.38.0 scales_1.4.0
[99] png_0.1-8 spatstat.univar_3.1-2
[101] rstudioapi_0.17.1 knitr_1.50
[103] reshape2_1.4.4 rjson_0.2.23
[105] uuid_1.2-1 checkmate_2.3.2
[107] nlme_3.1-168 curl_6.2.2
[109] repr_1.1.7 zoo_1.8-14
[111] cachem_1.1.0 stringr_1.5.1
[113] KernSmooth_2.23-26 parallel_4.4.3
[115] miniUI_0.1.2 foreign_0.8-90
[117] restfulr_0.0.15 pillar_1.10.2
[119] grid_4.4.3 vctrs_0.6.5
[121] RANN_2.6.2 promises_1.3.2
[123] xtable_1.8-4 cluster_2.1.8.1
[125] htmlTable_2.4.3 evaluate_1.0.3
[127] cli_3.6.5 compiler_4.4.3
[129] Rsamtools_2.22.0 rlang_1.1.6
[131] crayon_1.5.3 future.apply_1.11.3
[133] plyr_1.8.9 stringi_1.8.7
[135] viridisLite_0.4.2 deldir_2.0-4
[137] BiocParallel_1.40.0 lazyeval_0.2.2
[139] spatstat.geom_3.3-6 Matrix_1.7-3
[141] IRkernel_1.3.2 RcppHNSW_0.6.0
[143] patchwork_1.3.0 bit64_4.6.0-1
[145] future_1.40.0 ggplot2_3.5.2
[147] KEGGREST_1.46.0 shiny_1.10.0
[149] SummarizedExperiment_1.36.0 ROCR_1.0-11
[151] igraph_2.0.3 memoise_2.0.1
[153] fastmatch_1.1-6 bit_4.6.0
Dear authors, I find the nCount_ATAC in the metadata will decrease for all the cells after running
TSSEnrichment(ATAC, fast = FALSE)for a Seurat object with snATAC data from several samples (with separate fragment.tsv.gz). I checked the original code for TSSEnrichment but have no idea why this will happen.I have tried on my own data and public data, the same problem emerges. The problem will only occur when
fastset to FALSE and with number of samples >=2.public data used:
You will find the
table(ATAC@meta.data$nCount_ATAC<combined@meta.data$nCount_ATAC)returns TRUEsessionInfo():