From 0359203c2f091df3e2b08d52b29c2e504b1c6c51 Mon Sep 17 00:00:00 2001
From: alaskowski <alaskowski@ethz.ch>
Date: Mon, 24 Jul 2023 16:35:08 +0200
Subject: [PATCH] SSDM-13689: fixes to object-search, performance improvements,
 OBIS 0.4.2rc7

---
 .../src/python/CHANGELOG.md                   |  2 +-
 .../src/python/obis/__init__.py               |  2 +-
 .../src/python/obis/dm/commands/search.py     | 98 ++++++++++++-------
 .../src/python/obis/scripts/cli.py            |  2 +-
 app-openbis-command-line/src/python/setup.py  |  4 +-
 5 files changed, 68 insertions(+), 40 deletions(-)

diff --git a/app-openbis-command-line/src/python/CHANGELOG.md b/app-openbis-command-line/src/python/CHANGELOG.md
index 461d6c1c0ac..fd5eb265bfa 100644
--- a/app-openbis-command-line/src/python/CHANGELOG.md
+++ b/app-openbis-command-line/src/python/CHANGELOG.md
@@ -4,7 +4,7 @@
 * Added recursive search to object and data_set search commands
 * Updated documentation regarding authentication
 * Added dataset ids to sample search results
-* changed pybis dependency to version == 1.35.11
+* changed pybis dependency to version == 1.36.0
 
 # New in version 0.4.1
 
diff --git a/app-openbis-command-line/src/python/obis/__init__.py b/app-openbis-command-line/src/python/obis/__init__.py
index bba01c84213..6d545ea634c 100644
--- a/app-openbis-command-line/src/python/obis/__init__.py
+++ b/app-openbis-command-line/src/python/obis/__init__.py
@@ -14,6 +14,6 @@
 #
 __author__ = "ID SIS • ETH Zürich"
 __email__ = "openbis-support@id.ethz.ch"
-__version__ = "0.4.2rc6"
+__version__ = "0.4.2rc7"
 
 from .dm import *
diff --git a/app-openbis-command-line/src/python/obis/dm/commands/search.py b/app-openbis-command-line/src/python/obis/dm/commands/search.py
index 9c500c8df46..3198164bd16 100644
--- a/app-openbis-command-line/src/python/obis/dm/commands/search.py
+++ b/app-openbis-command-line/src/python/obis/dm/commands/search.py
@@ -18,7 +18,6 @@ import concurrent.futures
 import pandas as pd
 
 from pybis.property_reformatter import is_of_openbis_supported_date_format
-from pybis.sample import Sample
 from .openbis_command import OpenbisCommand
 from ..command_result import CommandResult
 from ..utils import cd
@@ -27,11 +26,11 @@ from ...scripts.click_util import click_echo
 
 def _dfs(objects, prop, func, func_specific):
     """Helper function that perform DFS search over children graph of objects"""
+    # TODO: improve performance of this - make it similar to _dfs_samples
     with concurrent.futures.ThreadPoolExecutor(
             max_workers=5) as pool_simple, concurrent.futures.ThreadPoolExecutor(
         max_workers=20) as pool_full:
-        stack = [getattr(openbis_obj, prop) for openbis_obj in
-                 objects]  # datasets and samples provide children in different formats
+        stack = [openbis_obj[prop] for openbis_obj in objects]  # datasets and samples provide children in different formats
         visited = set()
         stack.reverse()
         output = []
@@ -55,12 +54,26 @@ def _dfs(objects, prop, func, func_specific):
     return output
 
 
-def _get_datasets_of_samples(get_dataset_method, samples):
-    output = []
-    with concurrent.futures.ThreadPoolExecutor(
-            max_workers=5) as pool_simple:
-        output = pool_simple.map(get_dataset_method, samples)
-
+def _dfs_samples(data_base, prop, func):
+    """Helper function that perform DFS search over children graph of objects"""
+    output = data_base
+    ids = [x['children'] for x in data_base if x['children']]
+    ids = [x[prop][prop] for x in flatten(ids)]
+    visited = set([x[prop][prop] for x in data_base])
+    while ids:
+        data = func(ids)
+        data = list(data.values())
+        output += data
+        ids = []
+        children = []
+        for obj in data:
+            key = obj[prop][prop]
+            children += [x[prop][prop] for x in obj['children']]
+            if key not in visited:
+                visited.add(key)
+        for child in children:
+            if child not in visited:
+                ids += [child]
     return output
 
 
@@ -145,7 +158,13 @@ class Search(OpenbisCommand):
         super(Search, self).__init__(dm)
 
     def search_samples(self):
-        search_results = self._search_samples()
+        search_results = self._search_samples(raw_response=True)
+
+        search_results = self.openbis._sample_list_for_response(props=self.props,
+                                                                response=search_results,
+                                                                attrs=["parents", "children",
+                                                                       "dataSets"],
+                                                                parsed=True)
 
         click_echo(f"Objects found: {len(search_results)}")
         if self.save_path is not None:
@@ -163,30 +182,35 @@ class Search(OpenbisCommand):
     def _get_sample_with_datasets(self, identifier):
         return self.openbis.get_sample(identifier, withDataSetIds=True)
 
-    def _search_samples(self):
+    def _get_sample_with_datasets2(self, identifier):
+        return self.openbis.get_sample(identifier, withDataSetIds=True, raw_response=True)
+
+    def _search_samples(self, raw_response=False):
         """Helper method to search samples"""
 
+        if self.recursive:
+            raw_response = True
+
         if "object_code" in self.filters:
             results = self.openbis.get_samples(identifier=self.filters['object_code'],
                                                attrs=["parents", "children", "dataSets"],
+                                               raw_response=raw_response,
                                                props=self.props)
         else:
             args = self._get_filtering_args(self.props, ["parents", "children", "dataSets"])
+            args["raw_response"] = raw_response
             results = self.openbis.get_samples(**args)
 
         if self.recursive:
             click_echo(f"Recursive search enabled. It may take time to produce results.")
-            output = _dfs(results.objects, 'identifier',
-                          self._get_samples_children,
-                          self._get_sample_with_datasets)  # samples provide identifiers as children
-            search_results = self.openbis._sample_list_for_response(props=self.props,
-                                                                    response=[sample.data for sample
-                                                                              in output],
-                                                                    attrs=["parents", "children",
-                                                                           "dataSets"],
-                                                                    parsed=True)
+            output2 = _dfs_samples(results['objects'], 'identifier', self._get_sample_with_datasets2)
+
+            search_results = output2
         else:
-            search_results = results
+            if raw_response:
+                search_results = results['objects']
+            else:
+                search_results = results
         return search_results
 
     def _get_datasets_children(self, permId):
@@ -203,22 +227,26 @@ class Search(OpenbisCommand):
         dataset_filters = {k: v for (k, v) in main_filters.items() if not k.startswith('object_')}
         if object_filters:
             if 'id' in object_filters:
-                object_filters['object_code'] = object_filters['id']
+                if object_filters['id'] is not None:
+                    object_filters['object_code'] = object_filters['id']
                 del object_filters['id']
             self.filters = object_filters
-            search_results = self._search_samples()
-            datasets = [x for x in _get_datasets_of_samples(Sample.get_datasets, search_results) if
-                        x.totalCount > 0]
-            for thing in datasets:
-                for obj in thing.objects:
-                    if not _filter_dataset(obj, dataset_filters):
-                        for i in range(len(thing.response)):
-                            if thing.response[i]['permId']['permId'] == obj.permId:
-                                del thing.response[i]
-                                break
-            datasets = [x.response for x in datasets]
+            search_results = self._search_samples(raw_response=True)
+            click_echo(f"Samples found: {len(search_results)}")
+
+            datasets = [x["dataSets"] for x in search_results]
+            datasets = flatten(datasets)
+            datasets = [x['permId']['permId'] for x in datasets]
+            datasets = self.openbis.get_dataset(permIds=datasets)
+
+            filtered_datasets = []
+            for dataset in datasets:
+                if _filter_dataset(dataset, dataset_filters):
+                    filtered_datasets += [dataset]
+
             datasets = self.openbis._dataset_list_for_response(props=self.props,
-                                                               response=flatten(datasets),
+                                                               response=[x.data for x in
+                                                                         filtered_datasets],
                                                                parsed=True)
         else:
             if self.recursive:
@@ -264,7 +292,7 @@ class Search(OpenbisCommand):
         args = dict(space=self.filters['space'],
                     project=self.filters['project'],
                     # Not Supported with Project Samples disabled
-                    experiment=self.filters['experiment'],
+                    experiment=self.filters['collection'],
                     type=self.filters['type_code'],
                     where=where,
                     attrs=attrs,
diff --git a/app-openbis-command-line/src/python/obis/scripts/cli.py b/app-openbis-command-line/src/python/obis/scripts/cli.py
index 7cd7e41be8f..efd84086e5e 100644
--- a/app-openbis-command-line/src/python/obis/scripts/cli.py
+++ b/app-openbis-command-line/src/python/obis/scripts/cli.py
@@ -485,7 +485,7 @@ def object_search(ctx, type_code, space, project, collection, registration_date,
         filters = dict(object_code=object_id)
     else:
         filters = dict(type_code=type_code, space=space,
-                       project=project, experiment=collection, property_code=property_code,
+                       project=project, collection=collection, property_code=property_code,
                        registration_date=registration_date, modification_date=modification_date,
                        property_value=property_value)
     return ctx.obj['runner'].run("object_search",
diff --git a/app-openbis-command-line/src/python/setup.py b/app-openbis-command-line/src/python/setup.py
index 267cea437ff..993559aaf63 100644
--- a/app-openbis-command-line/src/python/setup.py
+++ b/app-openbis-command-line/src/python/setup.py
@@ -31,7 +31,7 @@ data_files = [
 
 setup(
     name="obis",
-    version="0.4.2rc6",
+    version="0.4.2rc7",
     description="Local data management with assistance from OpenBIS.",
     long_description=long_description,
     long_description_content_type="text/markdown",
@@ -42,7 +42,7 @@ setup(
     packages=["obis", "obis.dm", "obis.dm.commands", "obis.scripts"],
     data_files=data_files,
     package_data={"obis": ["dm/git-annex-attributes"]},
-    install_requires=["pyOpenSSL", "pytest", "pybis==1.35.11", "click"],
+    install_requires=["pyOpenSSL", "pytest", "pybis==1.36.0", "click"],
     entry_points={"console_scripts": ["obis=obis.scripts.cli:main"]},
     zip_safe=False,
     python_requires=">=3.3",
-- 
GitLab