Skip to content
Snippets Groups Projects
Commit 7329dc65 authored by sauves's avatar sauves
Browse files

added tpm script

parent 3486e519
No related branches found
No related tags found
No related merge requests found
import numpy as np
import pandas as pd
import os
import argparse
import pdb
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-GE', dest = 'GE_FILE', default = None, type = str, help = 'name of Gene Expression matrix file in RAW counts')
parser.add_argument('-GL', dest = 'GL_FILE', default = None, type = str, help = 'name of Gene LIST file (genes must appear in same order than in the GE matrix file.)')
parser.add_argument('-n', dest = 'NORMALISATION_PRTCL', default = 'TPM', type = str, help = 'type of normalisation used')
parser.add_argument('-conrsion_file', dest = 'CONVERSION_FILE', default = '~/leucegene/E19/tables/CONVERSIONS/gene_id_conversions.txt', help= 'name of file used to retrieve the following gene info: (1) "ensembl ID" to "gene symbol" or gene_name (2) transcript type ie. "protein_coding" "pseudogene" etc. (3) transcript length including all UTRs' )
parser.add_argument('-transcript_type', dest = 'TPT_TYPE', default = ['protein_coding'], type = str, nargs = '+', help= 'transcript types used for gene prefiltering')
args = parser.parse_args()
CONVERSION = pd.read_csv(args.CONVERSION_FILE, sep = '\t')
CONVERSION.columns = ['tpt_type', 'gene_name', 'strand', 'length','gencode_anno', 'go_domain', 'go_acc','chr','ensblID']
KEEP = CONVERSION[np.any(np.array(CONVERSION.tpt_type).reshape((CONVERSION.shape[0], 1)) == args.TPT_TYPE, axis = 1)][['gene_name', 'ensblID','tpt_type', 'length']]
GL = pd.read_csv(args.GL_FILE, sep = '\t')
pdb.set_trace()
GE = pd.read_csv(args.GE_FILE, sep = '\t')
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment