1. 安装效果
> packageVersion("Seurat")
[1] ‘5.0.0’
> packageVersion("SeuratObject")
[1] ‘5.0.1’
>
> packageVersion("SeuratData")
[1] ‘0.2.2.9001’
> packageVersion("SeuratWrappers")
[1] ‘0.3.2’
安装方法:有人需要了再更新。//todo
- CentOS7.9系统,除了执行 yum,尽量使用无root方式安装
- 编译安装 gcc12.3
- 编译安装 py 3.7 和 Seurat V5
2. demo 代码
# pbmc 3k
# https://satijalab.org/seurat/
# https://satijalab.org/seurat/articles/get_started_v5_new
# https://satijalab.org/seurat/articles/pbmc3k_tutorial
library(dplyr)
library(Seurat)
library(patchwork)
# load data ----
pbmc.data=Read10X("~/data/test/filtered_gene_bc_matrices/hg19/")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc
# QC ----
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc
# Normalizing ----
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
# HVG ----
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(pbmc), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
#plot1 + plot2
plot2
# Scale ----
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
# PCA ----
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
DimPlot(pbmc, reduction = "pca") + NoLegend()
DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)
ElbowPlot(pbmc, ndims = 50)
# Cluster the cells ----
pbmc <- FindNeighbors(pbmc, dims = 1:10)
pbmc <- FindClusters(pbmc, resolution = 0.5)
# UMAP ----
pbmc <- RunUMAP(pbmc, dims = 1:10)
DimPlot(pbmc, reduction = "umap")
# saveRDS(pbmc, file = "~/data/test/pbmc_tutorial.Seurat.Rds")
# Markers ----
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE)
pbmc.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1)
VlnPlot(pbmc, features = c("MS4A1", "CD79A"))
FeaturePlot(pbmc, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP",
"CD8A"))
pbmc.markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
slice_head(n = 5) %>%
ungroup() -> top10
DoHeatmap(pbmc, features = top10$gene) + NoLegend()
# Naming ----
new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",
"NK", "DC", "Platelet")
names(new.cluster.ids) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, new.cluster.ids)
DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
# saveRDS(pbmc, file = "~/data/test/pbmc_final.Seurat.Rds")
## replot ----
library(ggplot2)
plot <- DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size=0.5, label.size = 4) +
xlab("UMAP 1") + ylab("UMAP 2") +
theme(
axis.title = element_text(size = 14),
legend.text = element_text(size = 14)) +
guides(colour = guide_legend(override.aes = list(size = 4))); plot
#ggsave(filename = "../output/images/pbmc3k_umap.jpg", height = 7, width = 12, plot = plot, quality = 50)