2 RNAseq新版简化流程
此版本流程为升级版简化一键流程,基于面向对象对一些分析步骤进行了封装。适合从头从counts数据进行的整套分析流程,包括差异分析,富集分析,GSEA分析,画图等。
rm(list = ls());gc()
library(LZ)
library(YZ)
library(DESeq2)
library(parallel)
library(BiocParallel)
library(edgeR)
library(limma)
library(tibble)
library(dplyr)
library(ggplot2)
library(clusterProfiler)
# 0. 参数设置 -------------
# 需要设置的部分
n = 6
resultdir <- "./05.diff.rna"
mark <- "RNA"
pval = 0.05
fdr = 0.1
logfc = log2(1.5)
# 无需修改部分,按默认即可
register(SnowParam(n))
resultdir.sub <- paste0(dirclean(resultdir), "/", mark)
# 1. 读入数据 -------------
diff.subtype <- DEG_prepareData(eset_file="./data/RNA.csv",
eset.islog = F,
id_dot = F, col.by = "gene_name",
col.del=NULL, auto.del.character=T,
annot_trans=F, f_mark="rna",
oop = T,
group_file = "./data/RNA_group.csv")
# 2. 差异分析 -------------
diff.subtype.r <- diff.subtype$runDEG(f_mark = mark,
outdir = resultdir,
pval = pval, fdr = fdr, logfc = logfc)
### diff.e <- DEeset$runDEG(f_mark = "pq0.01fc1e", outdir = "./01.diff/",
### pval = 0.01, fdr = 0.01, logfc = 1, method = "edger")
### diff.v <- DEeset$runDEG(f_mark = "pq0.01fc1v", outdir = "./01.diff/",
### pval = 0.01, fdr = 0.01, logfc = 1, method = "voom")
# help("DEeset")
# help("DEres")
# 2.1 画差异基因分布火山图
pic.valcano <- diff.subtype.r$pic.valcano(outdir = resultdir.sub,
filterc = "pvalue",
pvalue = pval,
fdr = fdr,
logfc.p = log2(1.5),
logfc.n = -log2(1.5))
### pic.valcano.e <- diff.e$pic.valcano(outdir = "./01.diff/pq0.01fc1e")
### pic.valcano.v <- diff.v$pic.valcano(outdir = "./01.diff/pq0.01fc1v")
### deg.in <- Reduce(intersect, list(diff$glist.deg, diff.e$glist.deg, diff.v$glist.deg))
### grep("PHLDB2", deg.in)
grep("EGFR", diff.subtype.r$glist.deg)
grep("EGFR", rownames(diff.subtype$eset2))
diff.subtype.r$resdf %>% dplyr::filter(Gene == "EGFR")
# 3. 富集分析 -------------------
# 3.1 默认阈值下富集分析
enrich <- diff.subtype.r$runENRICH(outdir = paste0(resultdir.sub, "/rich"))
# 3.2 不同阈值下富集分析
# 3.2.1 阈值1
diff.subtype.r$glist.deg.multi %>% names()
diff.subtype.r$fc.list
diff.subtype.r$update.glist.deg.multi(
p = 0.05,
fdr = 0.05,
fc.list = list('1.5' = log2(1.5))
)
diff.subtype.r$glist.deg.multi %>% names()
enrich2 <- diff.subtype.r$runENRICH(
genelist.filter = 1,
outdir = paste0(resultdir.sub, "/rich")
)
# 3.2.2 阈值2
diff.subtype.r$glist.deg.multi %>% names()
diff.subtype.r$fc.list
diff.subtype.r$update.glist.deg.multi(
p = 0.01,
fdr = 0.01,
fc.list = list('1.6' = log2(1.6))
)
diff.subtype.r$glist.deg.multi %>% names()
enrich2 <- diff.subtype.r$runENRICH(
genelist.filter = 1,
outdir = paste0(resultdir.sub, "/rich")
)
# 3.2.3 阈值3 (批量分析)
diff.subtype.r$update.glist.deg.multi()
enrich3 <- diff$runENRICH(genelist.filter=c(4,8),
outdir = paste0(resultdir.sub, "/rich")
)
# 3.3 画图
keggdf <- enrich$KEGGDF
pdata <- keggdf$p0.05q0.05FC1.5$all # 要酌情修改p0.05q0.05FC1.5
DEGp_Dotplot2(df=pdata,
resultdir = paste0(resultdir.sub, "/rich/***"), # 要酌情修改***
filemark = "heihei", pcompress = 1)
# 4. GSEA分析
# 4.1 GSEA可视化分析
diff.subtype.r$glist.gsea %>% runAPP_GSEA()
# 4.1 GSEA批量化分析
# On they way ...
save.image(paste0(resultdir.sub, "/1.diff.image.Rdata"))