Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 25 additions & 19 deletions R/doubletFinder.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#' @param sct Logical representing whether SCTransform was used during original
#' Seurat object pre-processing (default = FALSE).
#' @param annotations annotations.
#' @param verbose Set to \code{FALSE} to hide print statements. Also passed to
#' Seurat functions.
#' @return Seurat object with updated metadata including pANN and doublet
#' classifications.
#' @author Chris McGinnis
Expand Down Expand Up @@ -63,7 +65,8 @@ doubletFinder <- function(seu,
nExp,
reuse.pANN = NULL,
sct = FALSE,
annotations = NULL) {
annotations = NULL,
verbose = TRUE) {

## Generate new list of doublet classificatons from existing pANN vector to save time
if (!is.null(reuse.pANN)) {
Expand All @@ -83,7 +86,7 @@ doubletFinder <- function(seu,
data <- counts[, real.cells]
n_real.cells <- length(real.cells)
n_doublets <- round(n_real.cells/(1 - pN) - n_real.cells)
print(paste("Creating",n_doublets,"artificial doublets...",sep=" "))
if(verbose) print(paste("Creating",n_doublets,"artificial doublets...",sep=" "))
real.cells1 <- sample(real.cells, n_doublets, replace = TRUE)
real.cells2 <- sample(real.cells, n_doublets, replace = TRUE)
doublets <- (data[, real.cells1] + data[, real.cells2])/2
Expand All @@ -104,16 +107,17 @@ doubletFinder <- function(seu,

## Pre-process Seurat object
if (!sct) {
print("Creating Seurat object...")
if(verbose) print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)

print("Normalizing Seurat object...")
if(verbose) print("Normalizing Seurat object...")
seu_wdoublets <- NormalizeData(seu_wdoublets,
normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method,
scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor,
margin = orig.commands$NormalizeData.RNA@params$margin)
margin = orig.commands$NormalizeData.RNA@params$margin,
verbose = verbose)

print("Finding variable genes...")
if(verbose) print("Finding variable genes...")
seu_wdoublets <- FindVariableFeatures(seu_wdoublets,
selection.method = orig.commands$FindVariableFeatures.RNA$selection.method,
loess.span = orig.commands$FindVariableFeatures.RNA$loess.span,
Expand All @@ -124,50 +128,52 @@ doubletFinder <- function(seu,
binning.method = orig.commands$FindVariableFeatures.RNA$binning.method,
nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures,
mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff,
dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff)
dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff,
verbose = verbose)

print("Scaling data...")
if(verbose) print("Scaling data...")
seu_wdoublets <- ScaleData(seu_wdoublets,
features = orig.commands$ScaleData.RNA$features,
model.use = orig.commands$ScaleData.RNA$model.use,
do.scale = orig.commands$ScaleData.RNA$do.scale,
do.center = orig.commands$ScaleData.RNA$do.center,
scale.max = orig.commands$ScaleData.RNA$scale.max,
block.size = orig.commands$ScaleData.RNA$block.size,
min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block)
min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block,
verbose = verbose)

print("Running PCA...")
if(verbose) print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets,
features = orig.commands$ScaleData.RNA$features,
npcs = length(PCs),
rev.pca = orig.commands$RunPCA.RNA$rev.pca,
weight.by.var = orig.commands$RunPCA.RNA$weight.by.var,
verbose=FALSE)
verbose = verbose)
pca.coord <- seu_wdoublets@[email protected][ , PCs]
cell.names <- rownames([email protected])
nCells <- length(cell.names)
rm(seu_wdoublets); gc() # Free up memory
} else {
print("Creating Seurat object...")
if(verbose) print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)

print("Running SCTransform...")
seu_wdoublets <- SCTransform(seu_wdoublets)
if(verbose) print("Running SCTransform...")
seu_wdoublets <- SCTransform(seu_wdoublets, verbose = verbose)

print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, npcs = length(PCs))
if(verbose) print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, npcs = length(PCs), verbose = verbose)
pca.coord <- seu_wdoublets@[email protected][ , PCs]
cell.names <- rownames([email protected])
nCells <- length(cell.names)
rm(seu_wdoublets); gc()
}

## Compute PC distance matrix
print("Calculating PC distance matrix...")
if(verbose) print("Calculating PC distance matrix...")
dist.mat <- fields::rdist(pca.coord)

## Compute pANN
print("Computing pANN...")
if(verbose) print("Computing pANN...")
pANN <- as.data.frame(matrix(0L, nrow = n_real.cells, ncol = 1))
if(!is.null(annotations)){
neighbor_types <- as.data.frame(matrix(0L, nrow = n_real.cells, ncol = length(levels(doublet_types1))))
Expand All @@ -193,7 +199,7 @@ doubletFinder <- function(seu,
}
}
}
print("Classifying doublets..")
if(verbose) print("Classifying doublets..")
classifications <- rep("Singlet",n_real.cells)
classifications[order(pANN$pANN[1:n_real.cells], decreasing=TRUE)[1:nExp]] <- "Doublet"
[email protected][, paste("pANN",pN,pK,nExp,sep="_")] <- pANN[rownames([email protected]), 1]
Expand Down
45 changes: 25 additions & 20 deletions R/parallel_paramSweep.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#' paramSweep_v3 arguments.
#' @param sct Logical representing whether Seurat object was pre-processed
#' using 'sctransform'. Set according to paramSweep_v3 arguments (default = F).
#' @param verbose Set to \code{FALSE} to hide print statements. Also passed to
#' Seurat functions.
#' @return Parallelization function compatible with mclapply.
#' @importFrom fields rdist
#' @importFrom Seurat SCTransform
Expand All @@ -31,13 +33,13 @@
parallel_paramSweep <- function(n, n.real.cells,
real.cells, pK,
pN, data, orig.commands,
PCs, sct) {
PCs, sct, verbose = TRUE) {

sweep.res.list = list()
list.ind = 0

## Make merged real-artifical data
print(paste("Creating artificial doublets for pN = ", pN[n]*100,"%",sep=""))
if(verbose) print(paste("Creating artificial doublets for pN = ", pN[n]*100,"%",sep=""))
n_doublets <- round(n.real.cells/(1 - pN[n]) - n.real.cells)
real.cells1 <- sample(real.cells, n_doublets, replace = TRUE)
real.cells2 <- sample(real.cells, n_doublets, replace = TRUE)
Expand All @@ -47,16 +49,17 @@ parallel_paramSweep <- function(n, n.real.cells,

## Pre-process Seurat object
if (!sct) {
print("Creating Seurat object...")
if(verbose) print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)

print("Normalizing Seurat object...")
if(verbose) print("Normalizing Seurat object...")
seu_wdoublets <- NormalizeData(seu_wdoublets,
normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method,
scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor,
margin = orig.commands$NormalizeData.RNA@params$margin)
margin = orig.commands$NormalizeData.RNA@params$margin,
verbose = verbose)

print("Finding variable genes...")
if(verbose) print("Finding variable genes...")
seu_wdoublets <- FindVariableFeatures(seu_wdoublets,
selection.method = orig.commands$FindVariableFeatures.RNA$selection.method,
loess.span = orig.commands$FindVariableFeatures.RNA$loess.span,
Expand All @@ -67,46 +70,48 @@ parallel_paramSweep <- function(n, n.real.cells,
binning.method = orig.commands$FindVariableFeatures.RNA$binning.method,
nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures,
mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff,
dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff)
dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff,
verbose = verbose)

print("Scaling data...")
if(verbose) print("Scaling data...")
seu_wdoublets <- ScaleData(seu_wdoublets,
features = orig.commands$ScaleData.RNA$features,
model.use = orig.commands$ScaleData.RNA$model.use,
do.scale = orig.commands$ScaleData.RNA$do.scale,
do.center = orig.commands$ScaleData.RNA$do.center,
scale.max = orig.commands$ScaleData.RNA$scale.max,
block.size = orig.commands$ScaleData.RNA$block.size,
min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block)
min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block,
verbose = verbose)

print("Running PCA...")
if(verbose) print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets,
features = orig.commands$ScaleData.RNA$features,
npcs = length(PCs),
rev.pca = orig.commands$RunPCA.RNA$rev.pca,
weight.by.var = orig.commands$RunPCA.RNA$weight.by.var,
verbose=FALSE)
verbose=verbose)
} else {
print("Creating Seurat object...")
if(verbose) print("Creating Seurat object...")
seu_wdoublets <- CreateSeuratObject(counts = data_wdoublets)

print("Running SCTransform...")
seu_wdoublets <- SCTransform(seu_wdoublets)
if(verbose) print("Running SCTransform...")
seu_wdoublets <- SCTransform(seu_wdoublets, verbose = verbose)

print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, npcs = length(PCs))
if(verbose) print("Running PCA...")
seu_wdoublets <- RunPCA(seu_wdoublets, npcs = length(PCs), verbose = verbose)
}

## Compute PC distance matrix
print("Calculating PC distance matrix...")
if(verbose) print("Calculating PC distance matrix...")
nCells <- nrow([email protected])
pca.coord <- seu_wdoublets@[email protected][ , PCs]
rm(seu_wdoublets)
gc()
dist.mat <- fields::rdist(pca.coord)[,1:n.real.cells]

## Pre-order PC distance matrix prior to iterating across pK for pANN computations
print("Defining neighborhoods...")
if(verbose) print("Defining neighborhoods...")
for (i in 1:n.real.cells) {
dist.mat[,i] <- order(dist.mat[,i])
}
Expand All @@ -116,9 +121,9 @@ parallel_paramSweep <- function(n, n.real.cells,
dist.mat <- dist.mat[1:ind, ]

## Compute pANN across pK sweep
print("Computing pANN across all pK...")
if(verbose) print("Computing pANN across all pK...")
for (k in 1:length(pK)) {
print(paste("pK = ", pK[k], "...", sep = ""))
if(verbose) print(paste("pK = ", pK[k], "...", sep = ""))
pk.temp <- round(nCells * pK[k])
pANN <- as.data.frame(matrix(0L, nrow = n.real.cells, ncol = 1))
colnames(pANN) <- "pANN"
Expand Down
11 changes: 7 additions & 4 deletions R/paramSweep.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#' @param sct Logical representing whether SCTransform was used during original
#' Seurat object pre-processing (default = FALSE).
#' @param num.cores Number of cores to use for parallelization, default=1.
#' @param verbose Set to \code{FALSE} to hide print statements. Also passed to
#' Seurat functions.
#' @return List of pANN vectors for every pN and pK combination. Output also
#' contains pANN information for artificial doublets.
#' @author Chris McGinnis
Expand All @@ -22,11 +24,11 @@
#' @examples
#' data(pbmc_small)
#' seu <- pbmc_small
#' sweep.list <- paramSweep(seu, PCs = 1:10, sct=FALSE)
#' sweep.list <- paramSweep(seu, PCs = 1:10, sct = FALSE)
#' sweep.stats <- summarizeSweep(sweep.list, GT = FALSE)
#' bcmvn <- find.pK(sweep.stats)
#'
paramSweep <- function(seu, PCs=1:10, sct = FALSE, num.cores=1) {
paramSweep <- function(seu, PCs=1:10, sct = FALSE, num.cores=1, verbose = TRUE) {
## Set pN-pK param sweep ranges
pK <- c(0.0005, 0.001, 0.005, seq(0.01,0.3,by=0.01))
pN <- seq(0.05,0.3,by=0.05)
Expand Down Expand Up @@ -74,7 +76,7 @@ paramSweep <- function(seu, PCs=1:10, sct = FALSE, num.cores=1) {
data,
orig.commands,
PCs,
sct,mc.cores=num.cores)
sct,mc.cores=num.cores, verbose = verbose)
stopCluster(cl)
}else{
output2 <- lapply(as.list(1:length(pN)),
Expand All @@ -86,7 +88,8 @@ paramSweep <- function(seu, PCs=1:10, sct = FALSE, num.cores=1) {
data,
orig.commands,
PCs,
sct)
sct,
verbose = verbose)
}

## Write parallelized output into list
Expand Down
14 changes: 9 additions & 5 deletions man/doubletFinder.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/parallel_paramSweep.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/paramSweep.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.