Albert Feghaly · d3bccf3e · 9b9bdfde · fd0e53d0 · 2a4c0057 · 3dd40338
--- a/pyiricdata/Client.py

+ 22

− 16
+++ b/pyiricdata/Client.py

+ 22

− 16
 @@ -10,7 +10,8 @@ from bs4 import BeautifulSoup
 import sys
 from collections import namedtuple

-from .tools import is_json
+from pyiricdata.tools import is_json
+from pyiricdata.exceptions import IricDataConnectionError


 IDF = namedtuple('IricDataFile', ['metadata', 'data', 'annotations'])
 @@ -41,13 +42,11 @@ class Client:

        if login.status_code == 200:
            if any(x in login.text for x in ['Erreur', 'Error']):
-                sys.stderr.write('ERROR: Connexion failed -- verify your username and password\n')
-                sys.exit(1)
+                raise IricDataConnectionError('Connexion failed -- verify your username and password')
            else:
                sys.stdout.write('Your connexion to IRIC-Data has been established [user=%s]\n' % self.user)
        else:
-            sys.stderr.write('ERROR: Could not initiate connexion with IRIC-Data\n')
-            sys.exit(1)
+            raise IricDataConnectionError('Could not initiate connexion with IRIC-Data')

        self.session = session

 @@ -144,9 +143,11 @@ class Client:
                )
            ).json()

-        df = pd.DataFrame(r['data'])
+        df = pd.DataFrame(r['data']).rename({'id': 'numerical_id'}, axis=1)  # id is internal to iric-data
        df.index = df.iric_data_id
        df.index.name = 'ID'
+        ordering = ['filename', 'numerical_id', 'hash']
+        df = df[ordering + [x for x in df.columns if x not in ordering]]

        return(df)

 @@ -186,13 +187,20 @@ class Client:
                         "get_datafiles_list(dataset_id=dataset_id)\n")
        return(self.get_datafiles_list(dataset_id=dataset_id))

-    """ Get DatasetId by name"""
-    def get_dataset_id_by_name(self, name):
-        return self.datasets.loc[self.datasets.dataset_name==name,'dataset_slug'][0]
+    """ Get a subset of the available datasets for which there is a match """
+    def filter_datasets(self, term, exact_match=False):
+        if exact_match:
+            return self.datasets[self.datasets.dataset_name.str.fullmatch(term)]
+        else:
+            return self.datasets[self.datasets.dataset_name.str.contains(term)]

-    """ Get a subset of the available datasets for which name match a given term """
-    def search_dataset_names(self, term):
-        return self.datasets.loc[self.datasets.dataset_name.str.contains(term),:]
+    def filter_datafiles(self, term, exact_match=False, **kwargs):  # kwargs refer to get_datafiles_list arguments
+        df = self.get_datafiles_list(**kwargs)
+        print(df)
+        if exact_match:
+            return df[df.filename.str.fullmatch(term)]
+        else:
+            return df[df.filename.str.contains(term)]

    """ Get file content according to file_id """
    def get_file(self, file_id):
        try:
            file_metadata = self.get_file_metadata(file_id)
            file_content = self.get_file_data_conn(file_id).content
            file_annotation = self.get_file_annotation(file_id)
            return IDF(file_metadata, file_content, file_annotation)
        except TypeError:
 @@ -434,6+442,6 @@
            if resp.status_code == 200:
                print('File update succesful on {}'.format(file_id))
            else:
-                sys.stderr.write('ERROR: something went wrong during datafiles update\n')
-                sys.exit(2)
+                raise IricDataConnectionError('Something went wrong during datafile update')
        else:
-            sys.stderr.write('FAILED: At least one error has occured, please fix them and try again.\n')
-            sys.exit(1)
+            raise IricDataConnectionError('At least one error has occured, please investigate and try again.')