diff --git a/pybis/src/python/pybis/pybis.py b/pybis/src/python/pybis/pybis.py index c6aa9b38588bc53aa3fc7d51f6a1be02aa5769fd..468969b282a705deeeb558ee5816db4ad88ce4b4 100644 --- a/pybis/src/python/pybis/pybis.py +++ b/pybis/src/python/pybis/pybis.py @@ -86,6 +86,8 @@ import errno import requests import urllib3 +import logging +import sys # import the various openBIS entities @@ -101,6 +103,8 @@ LOG_DEBUG = 7 DEBUG_LEVEL = LOG_NONE +def now(): + return time.time_ns() // 1000000 def get_search_type_for_entity(entity, operator=None): """Returns a dictionary containing the correct search criteria type @@ -2034,6 +2038,7 @@ class Openbis: attrs=None, props=None, where=None, + thingsFetchOptions={"withDataFrame": True, "withObjects": True}, **properties, ): """Returns a DataFrame of all samples for a given space/project/experiment (or any combination) @@ -2068,6 +2073,10 @@ class Openbis: b) property is not defined for this sampleType """ + logger = logging.getLogger('get_samples') + logger.setLevel(logging.DEBUG) + logger.addHandler(logging.StreamHandler(sys.stdout)) + if collection is not None: experiment = collection @@ -2150,27 +2159,39 @@ class Openbis: fetchopts, ], } + + time1 = now() + logger.debug("get_samples posting request") resp = self._post_request(self.as_v3, request) - samples = [] + time2 = now() + + logger.debug(f"get_samples got response. Delay: {time2 - time1}") parse_jackson(resp) - for obj in resp["objects"]: - sample = Sample( - openbis_obj=self, - type=self.get_sample_type(obj["type"]["code"]), - data=obj, - ) - samples.append(sample) - return self._sample_list_for_response( - response=resp["objects"], - attrs=attrs, - props=props, - start_with=start_with, - count=count, - totalCount=resp["totalCount"], - objects=samples, - ) + time3 = now() + + response = resp["objects"] + # samples = range(len(objects)) + logger.debug(f"get_samples got JSON. Delay: {time3 - time2}") + + objects = None + if thingsFetchOptions["withObjects"]: + objects = list(map(lambda obj: Sample(openbis_obj=self, type=self.get_sample_type(obj["type"]["code"]), + data=obj), response)) + + time4 = now() + + logger.debug(f"get_samples after result mapping. Delay: {time4 - time3}") + + result = self._sample_list_for_response(response=response, thingsFetchOptions=thingsFetchOptions, + attrs=attrs, props=props, start_with=start_with, count=count, + totalCount=resp["totalCount"], objects=objects, parsed=True) + + time5 = now() + + logger.debug(f"get_samples computed final result. Delay: {time5 - time4}") + return result get_objects = get_samples # Alias @@ -2534,7 +2555,6 @@ class Openbis: ) fetchopts["kind"] = kind raise NotImplementedError("you cannot search for dataSet kinds yet") - request = { "method": "searchDataSets", "params": [ @@ -2749,7 +2769,11 @@ class Openbis: if only_data: return resp[projectId] - project = Project(openbis_obj=self, type=None, data=resp[projectId]) + project = Project( + openbis_obj=self, + type=None, + data=resp[projectId] + ) if self.use_cache: self._object_cache(entity="project", code=projectId, value=project) return project @@ -2771,7 +2795,11 @@ class Openbis: if only_data: return resp["objects"][0] - project = Project(openbis_obj=self, type=None, data=resp["objects"][0]) + project = Project( + openbis_obj=self, + type=None, + data=resp["objects"][0] + ) if self.use_cache: self._object_cache(entity="project", code=projectId, value=project) return project @@ -3330,7 +3358,6 @@ class Openbis: "code" ] obj["creationDate"] = format_timestamp(obj["creationDate"]) - return objects def get_semantic_annotations(self): @@ -3466,7 +3493,6 @@ class Openbis: def new_plugin(self, name, pluginType, **kwargs): """Creates a new Plugin in openBIS. - name -- name of the plugin description -- pluginType -- DYNAMIC_PROPERTY, MANAGED_PROPERTY, ENTITY_VALIDATION @@ -4195,7 +4221,13 @@ class Openbis: ) def get_sample( - self, sample_ident, only_data=False, withAttachments=False, props=None, **kvals + self, + sample_ident, + only_data=False, + withAttachments=False, + props=None, + thingsFetchOptions={"withDataFrame": True, "withObjects": True}, + **kvals ): """Retrieve metadata for the sample. Get metadata for the sample and any directly connected parents of the sample to allow access @@ -4258,9 +4290,149 @@ class Openbis: else: return self._sample_list_for_response( response=list(resp.values()), + thingsFetchOptions=thingsFetchOptions, props=props, ) + def _sample_list_for_response( + self, + response, + thingsFetchOptions, + attrs=None, + props=None, + start_with=None, + count=None, + totalCount=0, + objects=None, + parsed=False, + ): + """returns a Things object, containing a DataFrame plus additional information""" + + def extract_attribute(attribute_to_extract): + def return_attribute(obj): + if obj is None: + return "" + return obj.get(attribute_to_extract, "") + + return return_attribute + + logger = logging.getLogger('_sample_list_for_response') + logger.setLevel(logging.DEBUG) + logger.addHandler(logging.StreamHandler(sys.stdout)) + + time1 = now() + + logger.debug("_sample_list_for_response before parsing JSON") + if not parsed: + parse_jackson(response) + + time2 = now() + + logger.debug(f"_sample_list_for_response got response. Delay: {time2 - time1}") + + dataFrame = None + if thingsFetchOptions["withDataFrame"]: + if attrs is None: + attrs = [] + default_attrs = [ + "identifier", + "permId", + "type", + "registrator", + "registrationDate", + "modifier", + "modificationDate", + ] + display_attrs = default_attrs + attrs + + if props is None: + props = [] + else: + if isinstance(props, str): + props = [props] + + if len(response) == 0: + for prop in props: + if prop == "*": + continue + display_attrs.append(prop) + samples = DataFrame(columns=display_attrs) + else: + time3 = now() + logger.debug(f"_sample_list_for_response computing attributes. Delay: {time3 - time2}") + + samples = DataFrame(response) + for attr in attrs: + if "." in attr: + entity, attribute_to_extract = attr.split(".") + samples[attr] = samples[entity].map( + extract_attribute(attribute_to_extract) + ) + # if no dot supplied, just display the code of the space, project or experiment + elif attr in ["project", "experiment"]: + samples[attr] = samples[attr].map(extract_nested_identifier) + elif attr in ["space"]: + samples[attr] = samples[attr].map(extract_code) + + samples["registrationDate"] = samples["registrationDate"].map( + format_timestamp + ) + samples["modificationDate"] = samples["modificationDate"].map( + format_timestamp + ) + samples["registrator"] = samples["registrator"].map(extract_person) + samples["modifier"] = samples["modifier"].map(extract_person) + samples["identifier"] = samples["identifier"].map(extract_identifier) + samples["container"] = samples["container"].map(extract_nested_identifier) + for column in ["parents", "children", "components"]: + if column in samples: + samples[column] = samples[column].map(extract_identifiers) + samples["permId"] = samples["permId"].map(extract_permid) + samples["type"] = samples["type"].map(extract_nested_permid) + + time4 = now() + logger.debug(f"_sample_list_for_response computed attributes. Delay: {time4 - time3}") + + for prop in props: + if prop == "*": + # include all properties in dataFrame. + # expand the dataFrame by adding new columns + columns = [] + for i, sample in enumerate(response): + for prop_name, val in sample.get("properties", {}).items(): + samples.loc[i, prop_name.upper()] = val + columns.append(prop_name.upper()) + + display_attrs += set(columns) + continue + else: + # property name is provided + for i, sample in enumerate(response): + if "properties" in sample: + properties = sample["properties"] + val = properties.get(prop, "") or properties.get(prop.upper(), "") + samples.loc[i, prop.upper()] = val + else: + samples.loc[i, prop.upper()] = "" + display_attrs.append(prop.upper()) + + time5 = now() + logger.debug(f"_sample_list_for_response computed properties. Delay: {time5 - time4}") + + dataFrame = samples[display_attrs] + + time6 = now() + logger.debug("_sample_list_for_response computing result.") + + result = Things(openbis_obj=self, entity="sample", + df=dataFrame, + identifier_name="identifier", start_with=start_with, + count=count, totalCount=totalCount, objects=objects, response=response) + + time7 = now() + logger.debug(f"_sample_list_for_response computed result. Delay: {time7 - time6}") + return result + @staticmethod def decode_attribute(entity, attribute): params = {} @@ -4328,118 +4500,6 @@ class Openbis: return params - def _sample_list_for_response( - self, - response, - attrs=None, - props=None, - start_with=None, - count=None, - totalCount=0, - objects=None, - ): - """returns a Things object, containing a DataFrame plus additional information""" - - def extract_attribute(attribute_to_extract): - def return_attribute(obj): - if obj is None: - return "" - return obj.get(attribute_to_extract, "") - - return return_attribute - - parse_jackson(response) - - if attrs is None: - attrs = [] - default_attrs = [ - "identifier", - "permId", - "type", - "registrator", - "registrationDate", - "modifier", - "modificationDate", - ] - display_attrs = default_attrs + attrs - - if props is None: - props = [] - else: - if isinstance(props, str): - props = [props] - - if len(response) == 0: - for prop in props: - if prop == "*": - continue - display_attrs.append(prop) - samples = DataFrame(columns=display_attrs) - else: - samples = DataFrame(response) - for attr in attrs: - if "." in attr: - entity, attribute_to_extract = attr.split(".") - samples[attr] = samples[entity].map( - extract_attribute(attribute_to_extract) - ) - - for attr in attrs: - # if no dot supplied, just display the code of the space, project or experiment - if attr in ["project", "experiment"]: - samples[attr] = samples[attr].map(extract_nested_identifier) - if attr in ["space"]: - samples[attr] = samples[attr].map(extract_code) - - samples["registrationDate"] = samples["registrationDate"].map( - format_timestamp - ) - samples["modificationDate"] = samples["modificationDate"].map( - format_timestamp - ) - samples["registrator"] = samples["registrator"].map(extract_person) - samples["modifier"] = samples["modifier"].map(extract_person) - samples["identifier"] = samples["identifier"].map(extract_identifier) - samples["container"] = samples["container"].map(extract_nested_identifier) - for column in ["parents", "children", "components"]: - if column in samples: - samples[column] = samples[column].map(extract_identifiers) - samples["permId"] = samples["permId"].map(extract_permid) - samples["type"] = samples["type"].map(extract_nested_permid) - - for prop in props: - if prop == "*": - # include all properties in dataFrame. - # expand the dataFrame by adding new columns - columns = [] - for i, sample in enumerate(response): - for prop_name, val in sample.get("properties", {}).items(): - samples.loc[i, prop_name.upper()] = val - columns.append(prop_name.upper()) - - display_attrs += set(columns) - continue - - else: - # property name is provided - for i, sample in enumerate(response): - val = sample.get("properties", {}).get(prop, "") or sample.get( - "properties", {} - ).get(prop.upper(), "") - samples.loc[i, prop.upper()] = val - display_attrs.append(prop.upper()) - - return Things( - openbis_obj=self, - entity="sample", - df=samples[display_attrs], - identifier_name="identifier", - start_with=start_with, - count=count, - totalCount=totalCount, - objects=objects, - ) - get_object = get_sample # Alias def get_external_data_management_systems( diff --git a/pybis/src/python/pybis/sample.py b/pybis/src/python/pybis/sample.py index 1bedb35909553a60ecbf577fb831527767011ad1..7ae6c8ab428e84fed135520ee5ca04230a8738a2 100644 --- a/pybis/src/python/pybis/sample.py +++ b/pybis/src/python/pybis/sample.py @@ -1,6 +1,6 @@ from .property import PropertyHolder from .attribute import AttrHolder -from .openbis_object import OpenBisObject +from .openbis_object import OpenBisObject from .definitions import openbis_definitions from .utils import VERBOSE @@ -23,11 +23,13 @@ class Sample( if data is not None: self._set_data(data) + # TODO: Why are we using getattr() and setattr() here? They are considerably slower. if project is not None: - setattr(self, 'project', project) + self.project = project if props is not None: for key in props: + # self.p[key] = props[key] setattr(self.p, key, props[key]) if kwargs is not None: @@ -36,10 +38,10 @@ class Sample( if 'experiment' in kwargs: try: - experiment = getattr(self, 'experiment') + experiment = self.experiment if not 'space' in kwargs: project = experiment.project - setattr(self.a, 'space', project.space) + self.a.space = project.space except Exception: pass @@ -77,10 +79,10 @@ class Sample( return [ 'type', 'get_parents()', 'get_children()', 'get_components()', - 'add_parents()', 'add_children()', 'add_components()', + 'add_parents()', 'add_children()', 'add_components()', 'del_parents()', 'del_children()', 'del_components()', 'set_parents()', 'set_children()', 'set_components()', - 'get_datasets()', + 'get_datasets()', 'space', 'project', 'experiment', 'container', 'tags', 'set_tags()', 'add_tags()', 'del_tags()', 'add_attachment()', 'get_attachments()', 'download_attachments()', diff --git a/pybis/src/python/pybis/things.py b/pybis/src/python/pybis/things.py index 51497d3a6c97089031a84696370e556aebbb2212..166a330191d69df69b77e3ee9b4d7c43ec3d6cc7 100644 --- a/pybis/src/python/pybis/things.py +++ b/pybis/src/python/pybis/things.py @@ -16,18 +16,19 @@ class Things(): Because the order of the elements cannot be ensured, you should choose the identifier instead: openbis.get_samples()['/SOME_SPACE/SAMPLE_CODE'] - Of course, if you know the identifier already, you would rather do: + Of course, if you know the identifier already, you would rather do: openbis.get_sample('/SOME_SPACE/SAMPLE_CODE') - - + + """ def __init__( self, openbis_obj, entity, df, - identifier_name='code', additional_identifier=None, + identifier_name='code', additional_identifier=None, start_with=None, count=None, totalCount=None, single_item_method=None, - objects=None + objects=None, + response=None ): self.openbis = openbis_obj self.entity = entity @@ -39,6 +40,7 @@ class Things(): self.totalCount=totalCount self.single_item_method=single_item_method self.objects=objects + self.response = response def __repr__(self): return tabulate(self.df, headers=list(self.df)) diff --git a/pybis/src/python/pybis/utils.py b/pybis/src/python/pybis/utils.py index ad917f5e46e7507fce048517d3169dd1685fa548..6ebd6cf72451a6f9f34424bf3c813c60b98a58c0 100644 --- a/pybis/src/python/pybis/utils.py +++ b/pybis/src/python/pybis/utils.py @@ -16,14 +16,14 @@ def parse_jackson(input_json): Any further findings only carry this reference id. This function is used to dereference the output. """ - interesting=['tags', 'registrator', 'modifier', 'owner', 'type', - 'parents', 'children', 'containers', # 'container', + interesting=['tags', 'registrator', 'modifier', 'owner', 'type', + 'parents', 'children', 'containers', # 'container', 'properties', 'experiment', 'sample', 'project', 'space', 'propertyType', 'entityType', 'propertyType', 'propertyAssignment', 'externalDms', 'roleAssignments', 'user', 'users', 'authorizationGroup', 'vocabulary', 'validationPlugin', 'dataSetPermId', 'dataStore' ] - found = {} + found = {} def build_cache(graph): if isinstance(graph, list): for item in graph: @@ -45,15 +45,17 @@ def parse_jackson(input_json): build_cache(value) elif isinstance(value, list): build_cache(value) - - def deref_graph(graph): + + def deref_graph(graph): if isinstance(graph, list): for i, list_item in enumerate(graph): if isinstance(list_item, int): - try: - graph[i] = found[list_item] - except KeyError: - pass + # try: # TODO: use "if list_item in found" with found.get() + # graph[i] = found[list_item] + # except KeyError: + # pass + if list_item in found: + graph[i] = found.get(list_item) else: deref_graph(list_item) elif isinstance(graph, dict) and len(graph) > 0: @@ -63,7 +65,6 @@ def parse_jackson(input_json): deref_graph(value) elif isinstance(value, int): graph[key] = found.get(value) - elif isinstance(value, list): for i, list_item in enumerate(value): if isinstance(list_item, int): @@ -189,9 +190,9 @@ def extract_identifiers(items): return [] try: return [ - data['identifier']['identifier'] - if 'identifier' in data - else data['permId']['permId'] + data['identifier']['identifier'] + if 'identifier' in data + else data['permId']['permId'] for data in items ] except TypeError: @@ -206,8 +207,8 @@ def extract_nested_identifier(ident): def extract_nested_permid(permid): if not isinstance(permid, dict): return '' if permid is None else str(permid) - return '' if permid['permId']['permId'] is None else permid['permId']['permId'] - + return '' if permid['permId']['permId'] is None else permid['permId']['permId'] + def extract_nested_permids(items): if not isinstance(items, list): return [] diff --git a/pybis/src/python/tests/conftest.py b/pybis/src/python/tests/conftest.py index 4bddb2c2294b6141ef0329da15c6a1c21e8186d8..edeaf72291fbd921c1a49128438e1929451636e6 100644 --- a/pybis/src/python/tests/conftest.py +++ b/pybis/src/python/tests/conftest.py @@ -4,12 +4,10 @@ import time import pytest from pybis import Openbis - openbis_url = "https://localhost:8443" admin_username = "admin" admin_password = "changeit" - @pytest.fixture(scope="module") def openbis_instance(): instance = Openbis( @@ -28,6 +26,7 @@ def other_openbis_instance(): instance = Openbis( url=openbis_url, verify_certificates=False, + ) print("\nLOGGING IN...") instance.login(admin_username, admin_password) @@ -41,6 +40,7 @@ def space(): o = Openbis( url=openbis_url, verify_certificates=False, + ) o.login(admin_username, admin_password)