diff --git a/pyiricdata/Client.py b/pyiricdata/Client.py index 46ecd1afa77e291d657f4fb49ca2dfa8b324625e..ec744cf1b51307bfa65a333dc63231a7dc839d0d 100755 --- a/pyiricdata/Client.py +++ b/pyiricdata/Client.py @@ -8,11 +8,15 @@ import numpy as np import json from bs4 import BeautifulSoup import sys +from collections import namedtuple from pyiricdata.tools import is_json from pyiricdata.exceptions import IricDataConnectionError +IDF = namedtuple('IricDataFile', ['metadata', 'data', 'annotations']) + + class Client: """ python client to IRICdata API""" def __init__(self, username, password=None, url='https://thepond.bioinfo.iric.ca'): @@ -167,6 +171,16 @@ class Client: annotation = None return annotation + """ Return a connector of the contents of a file for a given file_id """ + def get_file_data_conn(self, file_id): + path = os.path.join(self.url, 'secure/datafiles/download', str(file_id)) + try: + content = self.session.get(path, allow_redirects=True) + except: + sys.stderr.write('ERROR: File %s does not exist in database\n' % str(file_id)) + content = None + return content + """ Return DataFrame (file_name, file_id, file_slug, file_hash) for files in a given dataset """ def get_dataset_filelist(self, dataset_id): sys.stderr.write("DEPRECATED: Please use " + @@ -188,42 +202,59 @@ class Client: else: return df[df.filename.str.contains(term)] + """ Get file content according to file_id """ + def get_file(self, file_id): + try: + file_metadata = self.get_file_metadata(file_id) + file_content = self.get_file_data_conn(file_id).content + file_annotation = self.get_file_annotation(file_id) + return IDF(file_metadata, file_content, file_annotation) + except TypeError: + return IDF(None, None, None) + """ Download file according to file_id """ - def dwnl_file_content(self, file_id, folder_out='', filename=''): - folder_out = folder_out if folder_out and folder_out[0] == '/' else os.path.join(os.getcwd(), folder_out) - os.makedirs(folder_out, exist_ok=True) - file_object = self.get_file_metadata(file_id) - if not file_object is None: - filename = file_object['filename'] if not filename else filename + def dwnl_file_content(self, file_id, folder_out=None, filename=None): + file_meta = self.get_file_metadata(file_id) + if file_meta is not None: + if folder_out: + if folder_out[0] != '/': + folder_out = os.path.join(os.getcwd(), folder_out) + else: + folder_out = os.getcwd() + file_conn = self.get_file_data_conn(file_id) + filename = file_meta['filename'] if filename is None else filename out_file_path = os.path.join(folder_out, filename) - path = os.path.join(self.url, 'secure/datafiles/download', str(file_id)) - r = self.session.get(path, allow_redirects=True) if os.path.exists(out_file_path): sys.stderr.write('Warning: File already exists at location %s, skipping.\n' % out_file_path) else: + os.makedirs(folder_out, exist_ok=True) with open(out_file_path, 'wb') as outfile: print('Downloading %s' % out_file_path) - outfile.write(r.content) + outfile.write(file_conn.content) """ Write file annotations json to disk """ - def dwnl_file_annotation(self, file_id, folder_out='', filename=''): - folder_out = folder_out if folder_out and folder_out[0] == '/' else os.path.join(os.getcwd(), folder_out) - os.makedirs(folder_out, exist_ok=True) + def dwnl_file_annotation(self, file_id, folder_out=None, filename=None): file_meta = self.get_file_metadata(file_id) - if not file_meta is None: + if file_meta is not None: + if folder_out: + if folder_out[0] != '/': + folder_out = os.path.join(os.getcwd(), folder_out) + else: + folder_out = os.getcwd() annotations = self.get_file_annotation(file_id) - filename = file_meta['filename'] if not filename else filename + filename = file_meta['filename'] if filename is None else filename out_file_path = os.path.join(folder_out, filename + '.json') if os.path.exists(out_file_path): sys.stderr.write('Warning: File already exists at location %s, skipping.\n' % out_file_path) else: + os.makedirs(folder_out, exist_ok=True) with open(out_file_path, 'w') as outfile: json.dump(annotations, outfile) """ Download an entire dataset """ - def dwnl_dataset(self, dataset_id, folder_out='', datasetname=''): + def dwnl_dataset(self, dataset_id, folder_out=None, datasetname=None): dataset = self.get_dataset_filelist(dataset_id) - datasetname = self.datasets.loc[dataset_id].dataset_name if not datasetname else datasetname + datasetname = self.datasets.loc[dataset_id].dataset_name if datasetname is None else datasetname for file_id in np.unique(dataset.file_id): self.dwnl_file_content(file_id, os.path.join(folder_out, datasetname)) self.dwnl_file_annotation(file_id, os.path.join(folder_out, datasetname))