library(dplyr) library(tidyr) library(data.table) library(edgeR) # library(BiocParallel) library(stringr) COUNT_PROTOCOL = 'strand-specific' GE.counts.tpm.prt_cod = fread(paste('~/leucegene/data/star/COUNTS/GE_', COUNT_PROTOCOL, '_STAR_COUNTS_log10[TPMx1024_plus_one]_T.txt', sep = '',collapse = '')) duplicates = (GE.counts.tpm.prt_cod %>% select(gene_name) %>% group_by(gene_name) %>% summarise(n = n())%>% filter(n >= 2))$gene_name GE.counts.tpm.prt_cod = GE.counts.tpm.prt_cod %>% filter(!(gene_name %in% duplicates)) rownames(GE.counts.tpm.prt_cod) = GE.counts.tpm.prt_cod$gene_name limma.counts = GE.counts.tpm.prt_cod[,2:ncol(GE.counts.tpm.prt_cod)] not_nas = which(!(is.na(rowSums(limma.counts)))) limma.counts = limma.counts[not_nas,] # read in conversion file conversion = fread('~/leucegene/E19/tables/CONVERSIONS/gene_id_conversions.txt') names(conversion)[c(1,2,9)] = c('tpt_type', 'gene_name', 'ensbl_stable_ID') patients = fread('~/leucegene/E19/tables/PATIENTS/LEUCEGENE_452_LABELS.txt') # this is the coldata file coldata = data.frame( condition = as.factor(t(patients[18,2:ncol(patients)]))) %>% mutate(condition = str_replace(condition, '-', '_')) rownames(coldata) = names(patients)[2:ncol(patients)] groups = coldata$condition # figdir = '~/leucegene/E19/figures/JUN6' # system(paste('mkdir', figdir)) # pdf(paste(figdir, paste('limma_fig1_',COUNT_PROTOCOL,'.pdf', sep = '', collapse = ''), sep = '/')) # plotMDS(limma.counts, col = as.numeric(groups)) # dev.off() mm = model.matrix(~0 + groups) y = voom(limma.counts, mm, plot = T) fit = lmFit(y, mm) contr <- makeContrasts(groupsnon_stranded-groupsstranded, levels = colnames(coef(fit))) tmp <- contrasts.fit(fit, contr) tmp <- eBayes(tmp) top.table <- topTable(tmp, sort.by = "logFC", n = Inf) tabledir = '/u/sauves/leucegene/E19/tables/LIMMA-VOOM' tablename = paste('LIMMA_VOOM_DEG_PROTEIN_CODING_',nrow(top.table),'x', ncol(top.table),'_PRTCL_',COUNT_PROTOCOL,'_stranded_vs_n_stranded.txt', collapse = '', sep = '') write.table(top.table, paste(tabledir, tablename, sep = '/')) # head(top.table, 20)