2 RNAseq新版简化流程

此版本流程为升级版简化一键流程,基于面向对象对一些分析步骤进行了封装。适合从头从counts数据进行的整套分析流程,包括差异分析,富集分析,GSEA分析,画图等。

rm(list = ls());gc()
library(LZ)
library(YZ)
library(DESeq2)
library(parallel)
library(BiocParallel)
library(edgeR)
library(limma)
library(tibble)
library(dplyr)
library(ggplot2)
library(clusterProfiler)

# 0. 参数设置 -------------
# 需要设置的部分
n = 6
resultdir <- "./05.diff.rna"
mark <- "RNA"
pval = 0.05
fdr = 0.1
logfc = log2(1.5)
# 无需修改部分,按默认即可
register(SnowParam(n))
resultdir.sub <- paste0(dirclean(resultdir), "/", mark)

# 1. 读入数据 -------------
diff.subtype <- DEG_prepareData(eset_file="./data/RNA.csv", 
                                eset.islog = F,
                                id_dot = F, col.by = "gene_name",
                                col.del=NULL, auto.del.character=T,
                                annot_trans=F, f_mark="rna",
                                oop = T,
                                group_file = "./data/RNA_group.csv")
# 2. 差异分析 -------------
diff.subtype.r <- diff.subtype$runDEG(f_mark = mark, 
                                      outdir = resultdir, 
                                      pval = pval, fdr = fdr, logfc = logfc)
### diff.e <- DEeset$runDEG(f_mark = "pq0.01fc1e", outdir = "./01.diff/", 
###                         pval = 0.01, fdr = 0.01, logfc = 1, method = "edger")
### diff.v <- DEeset$runDEG(f_mark = "pq0.01fc1v", outdir = "./01.diff/", 
###                         pval = 0.01, fdr = 0.01, logfc = 1, method = "voom")
# help("DEeset")
# help("DEres")
# 2.1 画差异基因分布火山图
pic.valcano <- diff.subtype.r$pic.valcano(outdir = resultdir.sub,
                                          filterc = "pvalue",
                                          pvalue = pval,
                                          fdr = fdr,
                                          logfc.p = log2(1.5),
                                          logfc.n = -log2(1.5))
### pic.valcano.e <- diff.e$pic.valcano(outdir = "./01.diff/pq0.01fc1e")
### pic.valcano.v <- diff.v$pic.valcano(outdir = "./01.diff/pq0.01fc1v")
### deg.in <- Reduce(intersect, list(diff$glist.deg, diff.e$glist.deg, diff.v$glist.deg))
### grep("PHLDB2", deg.in)

grep("EGFR", diff.subtype.r$glist.deg)
grep("EGFR", rownames(diff.subtype$eset2))
diff.subtype.r$resdf %>% dplyr::filter(Gene == "EGFR")

# 3. 富集分析  -------------------
# 3.1 默认阈值下富集分析
enrich <- diff.subtype.r$runENRICH(outdir = paste0(resultdir.sub, "/rich"))
# 3.2 不同阈值下富集分析  
# 3.2.1 阈值1
diff.subtype.r$glist.deg.multi %>% names()
diff.subtype.r$fc.list
diff.subtype.r$update.glist.deg.multi(
  p = 0.05,
  fdr = 0.05,
  fc.list = list('1.5' = log2(1.5))
  )
diff.subtype.r$glist.deg.multi %>% names()
enrich2 <- diff.subtype.r$runENRICH(
  genelist.filter = 1,
  outdir = paste0(resultdir.sub, "/rich")
  )
# 3.2.2 阈值2
diff.subtype.r$glist.deg.multi %>% names()
diff.subtype.r$fc.list
diff.subtype.r$update.glist.deg.multi(
  p = 0.01,
  fdr = 0.01,
  fc.list = list('1.6' = log2(1.6))
  )
diff.subtype.r$glist.deg.multi %>% names()
enrich2 <- diff.subtype.r$runENRICH(
  genelist.filter = 1,
  outdir = paste0(resultdir.sub, "/rich")
  )
# 3.2.3 阈值3 (批量分析)
diff.subtype.r$update.glist.deg.multi()
enrich3 <- diff$runENRICH(genelist.filter=c(4,8), 
                          outdir = paste0(resultdir.sub, "/rich")
                          )
# 3.3 画图  
keggdf <- enrich$KEGGDF 
pdata <- keggdf$p0.05q0.05FC1.5$all  # 要酌情修改p0.05q0.05FC1.5
DEGp_Dotplot2(df=pdata, 
              resultdir = paste0(resultdir.sub, "/rich/***"), # 要酌情修改***
              filemark = "heihei", pcompress = 1)

# 4. GSEA分析
# 4.1 GSEA可视化分析
diff.subtype.r$glist.gsea %>% runAPP_GSEA()
# 4.1 GSEA批量化分析
# On they way ...

save.image(paste0(resultdir.sub, "/1.diff.image.Rdata"))