Skip to content
Snippets Groups Projects

Resolve "Allow to keep file content in memory"

Merged Albert Feghaly requested to merge 15-allow-to-keep-file-content-in-memory into master
1 unresolved thread
Files
2
+ 37
30
@@ -10,7 +10,8 @@ from bs4 import BeautifulSoup
import sys
from collections import namedtuple
from .tools import is_json
from pyiricdata.tools import is_json
from pyiricdata.exceptions import IricDataConnectionError
IDF = namedtuple('IricDataFile', ['metadata', 'data', 'annotations'])
@@ -41,13 +42,11 @@ class Client:
if login.status_code == 200:
if any(x in login.text for x in ['Erreur', 'Error']):
sys.stderr.write('ERROR: Connexion failed -- verify your username and password\n')
sys.exit(1)
raise IricDataConnectionError('Connexion failed -- verify your username and password')
else:
sys.stdout.write('Your connexion to IRIC-Data has been established [user=%s]\n' % self.user)
else:
sys.stderr.write('ERROR: Could not initiate connexion with IRIC-Data\n')
sys.exit(1)
raise IricDataConnectionError('Could not initiate connexion with IRIC-Data')
self.session = session
@@ -144,9 +143,11 @@ class Client:
)
).json()
df = pd.DataFrame(r['data'])
df = pd.DataFrame(r['data']).rename({'id': 'numerical_id'}, axis=1) # id is internal to iric-data
df.index = df.iric_data_id
df.index.name = 'ID'
ordering = ['filename', 'numerical_id', 'hash']
df = df[ordering + [x for x in df.columns if x not in ordering]]
return(df)
@@ -170,8 +171,8 @@ class Client:
annotation = None
return annotation
""" Return a handle of the contents of a file for a given file_id """
def get_file_content_handle(self, file_id):
""" Return a connector of the contents of a file for a given file_id """
def get_file_data_conn(self, file_id):
path = os.path.join(self.url, 'secure/datafiles/download', str(file_id))
try:
content = self.session.get(path, allow_redirects=True)
@@ -186,19 +187,26 @@ class Client:
"get_datafiles_list(dataset_id=dataset_id)\n")
return(self.get_datafiles_list(dataset_id=dataset_id))
""" Get DatasetId by name"""
def get_dataset_id_by_name(self, name):
return self.datasets.loc[self.datasets.dataset_name==name,'dataset_slug'][0]
""" Get a subset of the available datasets for which there is a match """
def filter_datasets(self, term, exact_match=False):
if exact_match:
return self.datasets[self.datasets.dataset_name.str.fullmatch(term)]
else:
return self.datasets[self.datasets.dataset_name.str.contains(term)]
""" Get a subset of the available datasets for which name match a given term """
def search_dataset_names(self, term):
return self.datasets.loc[self.datasets.dataset_name.str.contains(term),:]
def filter_datafiles(self, term, exact_match=False, **kwargs): # kwargs refer to get_datafiles_list arguments
df = self.get_datafiles_list(**kwargs)
print(df)
if exact_match:
return df[df.filename.str.fullmatch(term)]
else:
return df[df.filename.str.contains(term)]
""" Get file content according to file_id """
def get_file(self, file_id):
try:
file_metadata = self.get_file_metadata(file_id)
file_content = self.get_file_content_handle(file_id).content
file_content = self.get_file_data_conn(file_id).content
file_annotation = self.get_file_annotation(file_id)
return IDF(file_metadata, file_content, file_annotation)
except TypeError:
@@ -206,0+214,0 @@
""" Download file according to file_id """
def dwnl_file_content(self, file_id, folder_out=None, filename=None):
idf = self.get_file(file_id)
if idf.data is not None:
file_meta = self.get_file_metadata(file_id)
if file_meta is not None:
if folder_out:
if folder_out[0] != '/':
folder_out = os.path.join(os.getcwd(), folder_out)
else:
folder_out = os.getcwd()
filename = idf.metadata['filename'] if filename is None else filename
file_conn = self.get_file_data_conn(file_id)
filename = file_meta['filename'] if filename is None else filename
out_file_path = os.path.join(folder_out, filename)
if os.path.exists(out_file_path):
sys.stderr.write('Warning: File already exists at location %s, skipping.\n' % out_file_path)
@@ -221,24 +230,24 @@ class Client:
os.makedirs(folder_out, exist_ok=True)
with open(out_file_path, 'wb') as outfile:
print('Downloading %s' % out_file_path)
outfile.write(idf.data)
outfile.write(file_conn.content)
""" Write file annotations json to disk """
def dwnl_file_annotation(self, file_id, folder_out=None, filename=None):
if folder_out:
if folder_out[0] != '/':
folder_out = os.path.join(os.getcwd(), folder_out)
else:
folder_out = os.getcwd()
os.makedirs(folder_out, exist_ok=True)
file_meta = self.get_file_metadata(file_id)
if not file_meta is None:
if file_meta is not None:
if folder_out:
if folder_out[0] != '/':
folder_out = os.path.join(os.getcwd(), folder_out)
else:
folder_out = os.getcwd()
annotations = self.get_file_annotation(file_id)
filename = file_meta['filename'] if filename is None else filename
out_file_path = os.path.join(folder_out, filename + '.json')
if os.path.exists(out_file_path):
sys.stderr.write('Warning: File already exists at location %s, skipping.\n' % out_file_path)
else:
os.makedirs(folder_out, exist_ok=True)
with open(out_file_path, 'w') as outfile:
json.dump(annotations, outfile)
@@ -433,8 +442,6 @@ class Client:
if resp.status_code == 200:
print('File update succesful on {}'.format(file_id))
else:
sys.stderr.write('ERROR: something went wrong during datafiles update\n')
sys.exit(2)
raise IricDataConnectionError('Something went wrong during datafile update')
else:
sys.stderr.write('FAILED: At least one error has occured, please fix them and try again.\n')
sys.exit(1)
raise IricDataConnectionError('At least one error has occured, please investigate and try again.')
Loading