From 0359203c2f091df3e2b08d52b29c2e504b1c6c51 Mon Sep 17 00:00:00 2001 From: alaskowski <alaskowski@ethz.ch> Date: Mon, 24 Jul 2023 16:35:08 +0200 Subject: [PATCH] SSDM-13689: fixes to object-search, performance improvements, OBIS 0.4.2rc7 --- .../src/python/CHANGELOG.md | 2 +- .../src/python/obis/__init__.py | 2 +- .../src/python/obis/dm/commands/search.py | 98 ++++++++++++------- .../src/python/obis/scripts/cli.py | 2 +- app-openbis-command-line/src/python/setup.py | 4 +- 5 files changed, 68 insertions(+), 40 deletions(-) diff --git a/app-openbis-command-line/src/python/CHANGELOG.md b/app-openbis-command-line/src/python/CHANGELOG.md index 461d6c1c0ac..fd5eb265bfa 100644 --- a/app-openbis-command-line/src/python/CHANGELOG.md +++ b/app-openbis-command-line/src/python/CHANGELOG.md @@ -4,7 +4,7 @@ * Added recursive search to object and data_set search commands * Updated documentation regarding authentication * Added dataset ids to sample search results -* changed pybis dependency to version == 1.35.11 +* changed pybis dependency to version == 1.36.0 # New in version 0.4.1 diff --git a/app-openbis-command-line/src/python/obis/__init__.py b/app-openbis-command-line/src/python/obis/__init__.py index bba01c84213..6d545ea634c 100644 --- a/app-openbis-command-line/src/python/obis/__init__.py +++ b/app-openbis-command-line/src/python/obis/__init__.py @@ -14,6 +14,6 @@ # __author__ = "ID SIS • ETH Zürich" __email__ = "openbis-support@id.ethz.ch" -__version__ = "0.4.2rc6" +__version__ = "0.4.2rc7" from .dm import * diff --git a/app-openbis-command-line/src/python/obis/dm/commands/search.py b/app-openbis-command-line/src/python/obis/dm/commands/search.py index 9c500c8df46..3198164bd16 100644 --- a/app-openbis-command-line/src/python/obis/dm/commands/search.py +++ b/app-openbis-command-line/src/python/obis/dm/commands/search.py @@ -18,7 +18,6 @@ import concurrent.futures import pandas as pd from pybis.property_reformatter import is_of_openbis_supported_date_format -from pybis.sample import Sample from .openbis_command import OpenbisCommand from ..command_result import CommandResult from ..utils import cd @@ -27,11 +26,11 @@ from ...scripts.click_util import click_echo def _dfs(objects, prop, func, func_specific): """Helper function that perform DFS search over children graph of objects""" + # TODO: improve performance of this - make it similar to _dfs_samples with concurrent.futures.ThreadPoolExecutor( max_workers=5) as pool_simple, concurrent.futures.ThreadPoolExecutor( max_workers=20) as pool_full: - stack = [getattr(openbis_obj, prop) for openbis_obj in - objects] # datasets and samples provide children in different formats + stack = [openbis_obj[prop] for openbis_obj in objects] # datasets and samples provide children in different formats visited = set() stack.reverse() output = [] @@ -55,12 +54,26 @@ def _dfs(objects, prop, func, func_specific): return output -def _get_datasets_of_samples(get_dataset_method, samples): - output = [] - with concurrent.futures.ThreadPoolExecutor( - max_workers=5) as pool_simple: - output = pool_simple.map(get_dataset_method, samples) - +def _dfs_samples(data_base, prop, func): + """Helper function that perform DFS search over children graph of objects""" + output = data_base + ids = [x['children'] for x in data_base if x['children']] + ids = [x[prop][prop] for x in flatten(ids)] + visited = set([x[prop][prop] for x in data_base]) + while ids: + data = func(ids) + data = list(data.values()) + output += data + ids = [] + children = [] + for obj in data: + key = obj[prop][prop] + children += [x[prop][prop] for x in obj['children']] + if key not in visited: + visited.add(key) + for child in children: + if child not in visited: + ids += [child] return output @@ -145,7 +158,13 @@ class Search(OpenbisCommand): super(Search, self).__init__(dm) def search_samples(self): - search_results = self._search_samples() + search_results = self._search_samples(raw_response=True) + + search_results = self.openbis._sample_list_for_response(props=self.props, + response=search_results, + attrs=["parents", "children", + "dataSets"], + parsed=True) click_echo(f"Objects found: {len(search_results)}") if self.save_path is not None: @@ -163,30 +182,35 @@ class Search(OpenbisCommand): def _get_sample_with_datasets(self, identifier): return self.openbis.get_sample(identifier, withDataSetIds=True) - def _search_samples(self): + def _get_sample_with_datasets2(self, identifier): + return self.openbis.get_sample(identifier, withDataSetIds=True, raw_response=True) + + def _search_samples(self, raw_response=False): """Helper method to search samples""" + if self.recursive: + raw_response = True + if "object_code" in self.filters: results = self.openbis.get_samples(identifier=self.filters['object_code'], attrs=["parents", "children", "dataSets"], + raw_response=raw_response, props=self.props) else: args = self._get_filtering_args(self.props, ["parents", "children", "dataSets"]) + args["raw_response"] = raw_response results = self.openbis.get_samples(**args) if self.recursive: click_echo(f"Recursive search enabled. It may take time to produce results.") - output = _dfs(results.objects, 'identifier', - self._get_samples_children, - self._get_sample_with_datasets) # samples provide identifiers as children - search_results = self.openbis._sample_list_for_response(props=self.props, - response=[sample.data for sample - in output], - attrs=["parents", "children", - "dataSets"], - parsed=True) + output2 = _dfs_samples(results['objects'], 'identifier', self._get_sample_with_datasets2) + + search_results = output2 else: - search_results = results + if raw_response: + search_results = results['objects'] + else: + search_results = results return search_results def _get_datasets_children(self, permId): @@ -203,22 +227,26 @@ class Search(OpenbisCommand): dataset_filters = {k: v for (k, v) in main_filters.items() if not k.startswith('object_')} if object_filters: if 'id' in object_filters: - object_filters['object_code'] = object_filters['id'] + if object_filters['id'] is not None: + object_filters['object_code'] = object_filters['id'] del object_filters['id'] self.filters = object_filters - search_results = self._search_samples() - datasets = [x for x in _get_datasets_of_samples(Sample.get_datasets, search_results) if - x.totalCount > 0] - for thing in datasets: - for obj in thing.objects: - if not _filter_dataset(obj, dataset_filters): - for i in range(len(thing.response)): - if thing.response[i]['permId']['permId'] == obj.permId: - del thing.response[i] - break - datasets = [x.response for x in datasets] + search_results = self._search_samples(raw_response=True) + click_echo(f"Samples found: {len(search_results)}") + + datasets = [x["dataSets"] for x in search_results] + datasets = flatten(datasets) + datasets = [x['permId']['permId'] for x in datasets] + datasets = self.openbis.get_dataset(permIds=datasets) + + filtered_datasets = [] + for dataset in datasets: + if _filter_dataset(dataset, dataset_filters): + filtered_datasets += [dataset] + datasets = self.openbis._dataset_list_for_response(props=self.props, - response=flatten(datasets), + response=[x.data for x in + filtered_datasets], parsed=True) else: if self.recursive: @@ -264,7 +292,7 @@ class Search(OpenbisCommand): args = dict(space=self.filters['space'], project=self.filters['project'], # Not Supported with Project Samples disabled - experiment=self.filters['experiment'], + experiment=self.filters['collection'], type=self.filters['type_code'], where=where, attrs=attrs, diff --git a/app-openbis-command-line/src/python/obis/scripts/cli.py b/app-openbis-command-line/src/python/obis/scripts/cli.py index 7cd7e41be8f..efd84086e5e 100644 --- a/app-openbis-command-line/src/python/obis/scripts/cli.py +++ b/app-openbis-command-line/src/python/obis/scripts/cli.py @@ -485,7 +485,7 @@ def object_search(ctx, type_code, space, project, collection, registration_date, filters = dict(object_code=object_id) else: filters = dict(type_code=type_code, space=space, - project=project, experiment=collection, property_code=property_code, + project=project, collection=collection, property_code=property_code, registration_date=registration_date, modification_date=modification_date, property_value=property_value) return ctx.obj['runner'].run("object_search", diff --git a/app-openbis-command-line/src/python/setup.py b/app-openbis-command-line/src/python/setup.py index 267cea437ff..993559aaf63 100644 --- a/app-openbis-command-line/src/python/setup.py +++ b/app-openbis-command-line/src/python/setup.py @@ -31,7 +31,7 @@ data_files = [ setup( name="obis", - version="0.4.2rc6", + version="0.4.2rc7", description="Local data management with assistance from OpenBIS.", long_description=long_description, long_description_content_type="text/markdown", @@ -42,7 +42,7 @@ setup( packages=["obis", "obis.dm", "obis.dm.commands", "obis.scripts"], data_files=data_files, package_data={"obis": ["dm/git-annex-attributes"]}, - install_requires=["pyOpenSSL", "pytest", "pybis==1.35.11", "click"], + install_requires=["pyOpenSSL", "pytest", "pybis==1.36.0", "click"], entry_points={"console_scripts": ["obis=obis.scripts.cli:main"]}, zip_safe=False, python_requires=">=3.3", -- GitLab