Skip to content
Snippets Groups Projects
run_limma.R 2.04 KiB
Newer Older
sauves's avatar
sauves committed
library(dplyr)
library(tidyr)
library(data.table)
library(edgeR)
# library(BiocParallel)
library(stringr)
COUNT_PROTOCOL = 'strand-specific'
GE.counts.tpm.prt_cod = fread(paste('~/leucegene/data/star/COUNTS/GE_', COUNT_PROTOCOL, '_STAR_COUNTS_log10[TPMx1024_plus_one]_T.txt', sep = '',collapse = '')) 
duplicates = (GE.counts.tpm.prt_cod %>% select(gene_name) %>% group_by(gene_name) %>% summarise(n = n())%>% filter(n >= 2))$gene_name
GE.counts.tpm.prt_cod = GE.counts.tpm.prt_cod %>% filter(!(gene_name %in% duplicates))
rownames(GE.counts.tpm.prt_cod) = GE.counts.tpm.prt_cod$gene_name
limma.counts = GE.counts.tpm.prt_cod[,2:ncol(GE.counts.tpm.prt_cod)] 
not_nas = which(!(is.na(rowSums(limma.counts))))
limma.counts = limma.counts[not_nas,]
# read in conversion file 
conversion = fread('~/leucegene/E19/tables/CONVERSIONS/gene_id_conversions.txt')
names(conversion)[c(1,2,9)] = c('tpt_type', 'gene_name', 'ensbl_stable_ID') 

patients = fread('~/leucegene/E19/tables/PATIENTS/LEUCEGENE_452_LABELS.txt')

# this is the coldata file 
coldata = data.frame( condition = as.factor(t(patients[18,2:ncol(patients)]))) %>% mutate(condition = str_replace(condition, '-', '_'))
rownames(coldata) = names(patients)[2:ncol(patients)]
groups = coldata$condition

# figdir = '~/leucegene/E19/figures/JUN6'
# system(paste('mkdir', figdir))
# pdf(paste(figdir, paste('limma_fig1_',COUNT_PROTOCOL,'.pdf', sep = '', collapse = ''), sep = '/'))
# plotMDS(limma.counts, col = as.numeric(groups))
# dev.off()

mm = model.matrix(~0 + groups)
y = voom(limma.counts, mm, plot = T)
fit = lmFit(y, mm)
contr <- makeContrasts(groupsnon_stranded-groupsstranded, levels = colnames(coef(fit)))
tmp <- contrasts.fit(fit, contr)
tmp <- eBayes(tmp)
top.table <- topTable(tmp, sort.by = "logFC", n = Inf)
tabledir = '/u/sauves/leucegene/E19/tables/LIMMA-VOOM'
tablename = paste('LIMMA_VOOM_DEG_PROTEIN_CODING_',nrow(top.table),'x', ncol(top.table),'_PRTCL_',COUNT_PROTOCOL,'_stranded_vs_n_stranded.txt', collapse = '', sep = '')
write.table(top.table, paste(tabledir, tablename, sep = '/'))
# head(top.table, 20)