Skip to content
Snippets Groups Projects
Commit 1bb99df8 authored by Adam Laskowski's avatar Adam Laskowski
Browse files

SSDM-13456: obis search command now includes recursive search in...

SSDM-13456: obis search command now includes recursive search in samples/datasets. Refactored documentation. obis-0.4.2rc1
parent f4b301d5
No related branches found
No related tags found
1 merge request!40SSDM-13578 : 2PT : Database and V3 Implementation - include the new AFS "free"...
Showing with 280 additions and 99 deletions
# New in version 0.4.2
* Added filtering by object in object and data_set search commands
* Added recursive search to object and data_set search commands
* Updated documentation regarding authentication
# New in version 0.4.1 # New in version 0.4.1
* Fixed parameters for determine_hostname method in addref functionality * Fixed parameters for determine_hostname method in addref functionality
......
...@@ -14,6 +14,6 @@ ...@@ -14,6 +14,6 @@
# #
__author__ = "ID SIS • ETH Zürich" __author__ = "ID SIS • ETH Zürich"
__email__ = "openbis-support@id.ethz.ch" __email__ = "openbis-support@id.ethz.ch"
__version__ = "0.4.1" __version__ = "0.4.2rc1"
from .dm import * from .dm import *
...@@ -154,7 +154,8 @@ class OpenbisCommand(object): ...@@ -154,7 +154,8 @@ class OpenbisCommand(object):
def login(self): def login(self):
""" Restore session token if available. """ """ Restore session token if available. """
if 'config' in self.config_dict.keys(): if 'config' in self.config_dict.keys():
if 'openbis_token' in self.config_dict['config'].keys(): if 'openbis_token' in self.config_dict['config'].keys() and \
self.config_dict['config']['openbis_token'] is not None:
self.openbis.set_token(self.config_dict['config']['openbis_token'], True) self.openbis.set_token(self.config_dict['config']['openbis_token'], True)
""" Checks for valid session and asks user for password """ Checks for valid session and asks user for password
if login is needed. """ if login is needed. """
...@@ -225,7 +226,8 @@ class OpenbisCommand(object): ...@@ -225,7 +226,8 @@ class OpenbisCommand(object):
# ask user # ask user
hostname = self.ask_for_hostname(socket.gethostname()) hostname = self.ask_for_hostname(socket.gethostname())
# store # store
self.data_mgmt.config('config', True, False, OperationType.SET, prop='hostname', value=hostname) self.data_mgmt.config('config', True, False, OperationType.SET, prop='hostname',
value=hostname)
return hostname return hostname
def ask_for_hostname(self, hostname): def ask_for_hostname(self, hostname):
......
...@@ -19,43 +19,53 @@ from ..utils import cd ...@@ -19,43 +19,53 @@ from ..utils import cd
from ...scripts.click_util import click_echo from ...scripts.click_util import click_echo
def _dfs(objects, prop, func):
"""Helper function that perform DFS search over children graph of objects"""
stack = [getattr(openbis_obj, prop) for openbis_obj in
objects] # datasets and samples provide children in different formats
downloaded = {getattr(openbis_obj, prop): openbis_obj for openbis_obj in objects}
visited = set()
stack.reverse()
output = []
while stack:
key = stack.pop()
if key not in visited:
visited.add(key)
if key in downloaded:
obj = downloaded[key]
else:
obj = func(key)
output += [obj]
children = obj.children.copy()
children.reverse()
for child in children:
stack.append(child)
return output
class Search(OpenbisCommand): class Search(OpenbisCommand):
""" """
Command to search data in openBIS. Command to search samples or datasets in openBIS.
""" """
def __init__(self, dm, filters, save_path): def __init__(self, dm, filters, recursive, save_path):
""" """
:param dm: data management :param dm: data management
:param filters: Dictionary of filter to be used during search :param filters: Dictionary of filter to be used during search
:param recursive: Flag indicating recursive search in children
:param save_path: Path to save results. If not set, results will not be saved. :param save_path: Path to save results. If not set, results will not be saved.
""" """
self.filters = filters self.filters = filters
self.recursive = recursive
self.save_path = save_path self.save_path = save_path
self.load_global_config(dm) self.load_global_config(dm)
self.props = "*"
self.attrs = ["parents", "children"]
super(Search, self).__init__(dm) super(Search, self).__init__(dm)
def search_samples(self): def search_samples(self):
properties = None search_results = self._search_samples()
if self.filters['property_code'] is not None and self.filters['property_value'] is not None:
properties = {
self.filters['property_code']: self.filters['property_value'],
}
args = dict(space=self.filters['space'],
project=self.filters['project'], # Not Supported with Project Samples disabled
experiment=self.filters['experiment'],
type=self.filters['type_code'],
where=properties,
attrs=["parents", "children"],
props="*") # Fetch all properties
if self.filters['registration_date'] is not None:
args['registrationDate'] = self.filters['registration_date']
if self.filters['modification_date'] is not None:
args['modificationDate'] = self.filters['modification_date']
search_results = self.openbis.get_samples(**args)
click_echo(f"Objects found: {len(search_results)}") click_echo(f"Objects found: {len(search_results)}")
if self.save_path is not None: if self.save_path is not None:
click_echo(f"Saving search results in {self.save_path}") click_echo(f"Saving search results in {self.save_path}")
...@@ -66,37 +76,81 @@ class Search(OpenbisCommand): ...@@ -66,37 +76,81 @@ class Search(OpenbisCommand):
return CommandResult(returncode=0, output="Search completed.") return CommandResult(returncode=0, output="Search completed.")
def _search_samples(self):
"""Helper method to search samples"""
if "object_code" in self.filters:
results = self.openbis.get_samples(identifier=self.filters['object_code'],
attrs=self.attrs, props=self.props)
else:
args = self._get_filtering_args(self.props)
results = self.openbis.get_samples(**args)
if self.recursive:
output = _dfs(results.objects, 'identifier',
self.openbis.get_sample) # samples provide identifiers as children
search_results = self.openbis._sample_list_for_response(props=self.props,
response=[sample.data for sample
in output],
parsed=True)
else:
search_results = results
return search_results
def search_data_sets(self): def search_data_sets(self):
if self.save_path is not None and self.fileservice_url() is None: if self.save_path is not None and self.fileservice_url() is None:
return CommandResult(returncode=-1, return CommandResult(returncode=-1,
output="Configuration fileservice_url needs to be set for download.") output="Configuration fileservice_url needs to be set for download.")
properties = None if self.recursive:
search_results = self._search_samples() # Look for samples recursively
o = []
for sample in search_results.objects: # get datasets
o += sample.get_datasets(
attrs=self.attrs, props=self.props)
output = _dfs(o, 'permId', # datasets provide permIds as children
self.openbis.get_dataset) # look for child datasets of sample datasets
datasets = self.openbis._dataset_list_for_response(props=self.props,
response=[dataset.data for dataset
in output],
parsed=True)
else:
if "object_code" in self.filters:
results = self.openbis.get_sample(self.filters['object_code']).get_datasets(
attrs=self.attrs, props=self.props)
else:
args = self._get_filtering_args(self.props)
results = self.openbis.get_datasets(**args)
datasets = results
click_echo(f"Data sets found: {len(datasets)}")
if self.save_path is not None:
click_echo(f"Saving search results in {self.save_path}")
with cd(self.data_mgmt.invocation_path):
datasets.df.to_csv(self.save_path, index=False)
else:
click_echo(f"Search results:\n{datasets}")
return CommandResult(returncode=0, output="Search completed.")
def _get_filtering_args(self, props):
where = None
if self.filters['property_code'] is not None and self.filters['property_value'] is not None: if self.filters['property_code'] is not None and self.filters['property_value'] is not None:
properties = { where = {
self.filters['property_code']: self.filters['property_value'], self.filters['property_code']: self.filters['property_value'],
} }
args = dict(space=self.filters['space'], args = dict(space=self.filters['space'],
project=self.filters['project'], # Not Supported with Project Samples disabled project=self.filters['project'],
# Not Supported with Project Samples disabled
experiment=self.filters['experiment'], experiment=self.filters['experiment'],
type=self.filters['type_code'], type=self.filters['type_code'],
where=properties, where=where,
attrs=["parents", "children"], attrs=self.attrs,
props="*") # Fetch all properties props=props)
if self.filters['registration_date'] is not None: if self.filters['registration_date'] is not None:
args['registrationDate'] = self.filters['registration_date'] args['registrationDate'] = self.filters['registration_date']
if self.filters['modification_date'] is not None: if self.filters['modification_date'] is not None:
args['modificationDate'] = self.filters['modification_date'] args['modificationDate'] = self.filters['modification_date']
return args
datasets = self.openbis.get_datasets(**args)
click_echo(f"Data sets found: {len(datasets)}")
if self.save_path is not None:
click_echo(f"Saving search results in {self.save_path}")
with cd(self.data_mgmt.invocation_path):
datasets.df.to_csv(self.save_path, index=False)
else:
click_echo(f"Search results:\n{datasets}")
return CommandResult(returncode=0, output="Search completed.")
...@@ -71,7 +71,8 @@ def DataMgmt(echo_func=None, settings_resolver=None, openbis_config={}, git_conf ...@@ -71,7 +71,8 @@ def DataMgmt(echo_func=None, settings_resolver=None, openbis_config={}, git_conf
repository_type = Type.LINK repository_type = Type.LINK
if repository_type == Type.PHYSICAL: if repository_type == Type.PHYSICAL:
return PhysicalDataMgmt(settings_resolver, None, None, openbis, log, data_path, complete_openbis_config(openbis_config, settings_resolver)
return PhysicalDataMgmt(settings_resolver, openbis_config, None, openbis, log, data_path,
metadata_path, invocation_path) metadata_path, invocation_path)
else: else:
complete_git_config(git_config) complete_git_config(git_config)
...@@ -229,17 +230,19 @@ class AbstractDataMgmt(metaclass=abc.ABCMeta): ...@@ -229,17 +230,19 @@ class AbstractDataMgmt(metaclass=abc.ABCMeta):
return return
@abc.abstractmethod @abc.abstractmethod
def search_object(self, filters, save): def search_object(self, filters, recursive, save):
"""Search for objects in openBIS using filtering criteria. """Search for objects in openBIS using filtering criteria.
:param filters: dictionary of filter parameters :param filters: dictionary of filter parameters
:param recursive: Flag indicating if search should include children recursively
:param save: File path to save results. If missing, search results will not be saved. :param save: File path to save results. If missing, search results will not be saved.
""" """
return return
@abc.abstractmethod @abc.abstractmethod
def search_data_set(self, filters, save): def search_data_set(self, filters, recursive, save):
"""Search for datasets in openBIS using filtering criteria. """Search for datasets in openBIS using filtering criteria.
:param filters: dictionary of filter parameters :param filters: dictionary of filter parameters
:param recursive: Flag indicating if search should include children recursively
:param save: File path to save results. If missing, search results will not be saved. :param save: File path to save results. If missing, search results will not be saved.
""" """
return return
...@@ -642,12 +645,12 @@ class PhysicalDataMgmt(AbstractDataMgmt): ...@@ -642,12 +645,12 @@ class PhysicalDataMgmt(AbstractDataMgmt):
cmd = Upload(self, sample_id, data_set_type, files) cmd = Upload(self, sample_id, data_set_type, files)
return cmd.run() return cmd.run()
def search_object(self,filters, save): def search_object(self, filters, recursive, save):
cmd = Search(self, filters, save) cmd = Search(self, filters, recursive, save)
return cmd.search_samples() return cmd.search_samples()
def search_data_set(self, filters, save): def search_data_set(self, filters, recursive, save):
cmd = Search(self, filters, save) cmd = Search(self, filters, recursive, save)
return cmd.search_data_sets() return cmd.search_data_sets()
def config(self, category, is_global, is_data_set_property, operation_type, prop=None, def config(self, category, is_global, is_data_set_property, operation_type, prop=None,
......
...@@ -17,12 +17,14 @@ import shutil ...@@ -17,12 +17,14 @@ import shutil
from .checksum import ChecksumGeneratorCrc32, ChecksumGeneratorGitAnnex from .checksum import ChecksumGeneratorCrc32, ChecksumGeneratorGitAnnex
from .utils import run_shell from .utils import run_shell
from ..scripts.click_util import click_echo
class GitWrapper(object): class GitWrapper(object):
"""A wrapper on commands to git and git annex.""" """A wrapper on commands to git and git annex."""
def __init__(self, git_path=None, git_annex_path=None, find_git=None, data_path=None, metadata_path=None, invocation_path=None): def __init__(self, git_path=None, git_annex_path=None, find_git=None, data_path=None,
metadata_path=None, invocation_path=None):
self.git_path = git_path self.git_path = git_path
self.git_annex_path = git_annex_path self.git_annex_path = git_annex_path
self.data_path = data_path self.data_path = data_path
...@@ -39,17 +41,20 @@ class GitWrapper(object): ...@@ -39,17 +41,20 @@ class GitWrapper(object):
cmd += params cmd += params
return run_shell(cmd, strip_leading_whitespace=strip_leading_whitespace) return run_shell(cmd, strip_leading_whitespace=strip_leading_whitespace)
def can_run(self): def can_run(self):
"""Return true if the perquisites are satisfied to run (git and git annex)""" """Return true if the perquisites are satisfied to run (git and git annex)"""
if self.git_path is None: if self.git_path is None:
click_echo('No git path found!')
return False return False
if self.git_annex_path is None: if self.git_annex_path is None:
click_echo('No git-annex path found!')
return False return False
if self._git(['help']).failure(): if self._git(['help']).failure():
click_echo('Can not run git!')
# git help should have a returncode of 0 # git help should have a returncode of 0
return False return False
if self._git(['annex', 'help']).failure(): if self._git(['annex', 'help']).failure():
click_echo('Can not run git-annex!')
# git help should have a returncode of 0 # git help should have a returncode of 0
return False return False
result = run_shell([self.git_path, 'annex', 'version']) result = run_shell([self.git_path, 'annex', 'version'])
...@@ -60,7 +65,7 @@ class GitWrapper(object): ...@@ -60,7 +65,7 @@ class GitWrapper(object):
try: try:
self.annex_major_version = int(self.annex_version.split(".")[0]) self.annex_major_version = int(self.annex_version.split(".")[0])
except Exception as e: except Exception as e:
print("Invalid git-annex version line:",result.output) print("Invalid git-annex version line:", result.output)
return False return False
return True return True
...@@ -199,9 +204,11 @@ class GitRepoFileInfo(object): ...@@ -199,9 +204,11 @@ class GitRepoFileInfo(object):
def cksum(self, files, git_annex_hash_as_checksum=False): def cksum(self, files, git_annex_hash_as_checksum=False):
if git_annex_hash_as_checksum == False: if git_annex_hash_as_checksum == False:
checksum_generator = ChecksumGeneratorCrc32(self.git_wrapper.data_path, self.git_wrapper.metadata_path) checksum_generator = ChecksumGeneratorCrc32(self.git_wrapper.data_path,
self.git_wrapper.metadata_path)
else: else:
checksum_generator = ChecksumGeneratorGitAnnex(self.git_wrapper.data_path, self.git_wrapper.metadata_path) checksum_generator = ChecksumGeneratorGitAnnex(self.git_wrapper.data_path,
self.git_wrapper.metadata_path)
checksums = [] checksums = []
......
...@@ -44,12 +44,12 @@ def complete_openbis_config(config, resolver, local_only=True): ...@@ -44,12 +44,12 @@ def complete_openbis_config(config, resolver, local_only=True):
config['verify_certificates'] = config_dict['verify_certificates'] config['verify_certificates'] = config_dict['verify_certificates']
if config.get('token') is None: if config.get('token') is None:
config['token'] = None config['token'] = None
if config.get('is_physical') is None: if config.get('is_physical') is None and config_dict['is_physical'] is not None:
config['is_physical'] = None config['is_physical'] = config_dict['is_physical']
if config.get( if config.get(
'allow_http_but_do_not_use_this_in_production_and_only_within_safe_networks') is None: 'allow_http_but_do_not_use_this_in_production_and_only_within_safe_networks') is None:
config['allow_http_but_do_not_use_this_in_production_and_only_within_safe_networks'] = not \ config['allow_http_but_do_not_use_this_in_production_and_only_within_safe_networks'] = not \
config_dict['allow_only_https'] config_dict['allow_only_https']
def complete_git_config(config): def complete_git_config(config):
......
This diff is collapsed.
...@@ -31,7 +31,7 @@ data_files = [ ...@@ -31,7 +31,7 @@ data_files = [
setup( setup(
name="obis", name="obis",
version="0.4.1", version="0.4.2rc1",
description="Local data management with assistance from OpenBIS.", description="Local data management with assistance from OpenBIS.",
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
......
...@@ -22,9 +22,12 @@ case, OpenBIS is aware of its existence and the data can be used for provenance ...@@ -22,9 +22,12 @@ case, OpenBIS is aware of its existence and the data can be used for provenance
1. [Settings](#521-settings) 1. [Settings](#521-settings)
2. [Commands](#522-commands) 2. [Commands](#522-commands)
3. [Examples](#523-examples) 3. [Examples](#523-examples)
6. [Big Data Link Services](#6-big-data-link-services) 6. [Authentication](#6-authentication)
7. [Rationale for obis](#7-rationale-for-obis) 1. [Login](#61-login)
8. [Literature](#8-literature) 2. [Personal Access Token](#62-personal-access-token)
7. [Big Data Link Services](#7-big-data-link-services)
8. [Rationale for obis](#8-rationale-for-obis)
9. [Literature](#9-literature)
## 1. Prerequisites ## 1. Prerequisites
...@@ -166,7 +169,7 @@ Here is a short summary of which commands are available in given modes: ...@@ -166,7 +169,7 @@ Here is a short summary of which commands are available in given modes:
| settings clear | ❌ | ✅ | | settings clear | ❌ | ✅ |
| status | ❌ | ✅ | | status | ❌ | ✅ |
| sync | ❌ | ✅ | | sync | ❌ | ✅ |
| token | | ✅ | | token | | ✅ |
| upload | ✅ | ❌ | | upload | ✅ | ❌ |
**Login** **Login**
...@@ -234,6 +237,7 @@ Options: ...@@ -234,6 +237,7 @@ Options:
-space, --space TEXT Space code -space, --space TEXT Space code
-project, --project TEXT Full project identification code -project, --project TEXT Full project identification code
-experiment, --experiment TEXT Full experiment code -experiment, --experiment TEXT Full experiment code
-object, --object TEXT Object identification information, it can be permId or identifier
-type, --type TEXT Type code -type, --type TEXT Type code
-registration-date, --registration-date TEXT -registration-date, --registration-date TEXT
Registration date, it can be in the format Registration date, it can be in the format
...@@ -244,12 +248,16 @@ Options: ...@@ -244,12 +248,16 @@ Options:
-property TEXT Property code -property TEXT Property code
-property-value TEXT Property value -property-value TEXT Property value
-save, --save TEXT Directory name to save results -save, --save TEXT Directory name to save results
-r, --recursive Search data recursively
``` ```
With `data_set search` command, obis connects to a configured OpenBIS instance and searches for all With `data_set search` command, obis connects to a configured OpenBIS instance and searches for all
data sets that fulfill given filtering criteria. data sets that fulfill given filtering criteria or by using object identification string.
At least one filtering criteria must be specified. Search results can be downloaded by At least one search option must be specified.
using `save` option.
Search results can be downloaded into a file by using `save` option.
Recursive option enables searching for datasets of children samples or datasets
*Note: Filtering by `-project` may not work when `Project Samples` are disabled in OpenBIS *Note: Filtering by `-project` may not work when `Project Samples` are disabled in OpenBIS
configuration.* configuration.*
...@@ -306,6 +314,7 @@ Options: ...@@ -306,6 +314,7 @@ Options:
-space, --space TEXT Space code -space, --space TEXT Space code
-project, --project TEXT Full project identification code -project, --project TEXT Full project identification code
-experiment, --experiment TEXT Full experiment -experiment, --experiment TEXT Full experiment
-object, --object TEXT Object identification information, it can be permId or identifier
-registration-date, --registration-date TEXT -registration-date, --registration-date TEXT
Registration date, it can be in the format Registration date, it can be in the format
"oYYYY-MM-DD" (e.g. ">2023-01-31", "=2023-01-31", "<2023-01-31") "oYYYY-MM-DD" (e.g. ">2023-01-31", "=2023-01-31", "<2023-01-31")
...@@ -315,12 +324,16 @@ Options: ...@@ -315,12 +324,16 @@ Options:
-property TEXT Property code -property TEXT Property code
-property-value TEXT Property value -property-value TEXT Property value
-save, --save TEXT File name to save results in csv format -save, --save TEXT File name to save results in csv format
-r, --recursive Search data recursively
``` ```
With `object search` command, obis connects to a configured OpenBIS instance and searches for all With `object search` command, obis connects to a configured OpenBIS instance and searches for all
objects/samples that fulfill given filtering criteria. objects/samples that fulfill given filtering criteria or by using object identification string.
At least one filtering criteria must be specified. Search results can be downloaded int a file by At least one search option must be specified.
using `-save` option.
Search results can be downloaded into a file by using `save` option.
Recursive option enables searching for datasets of children samples or datasets
*Note: Filtering by `-project` may not work when `Project Samples` are disabled in OpenBIS *Note: Filtering by `-project` may not work when `Project Samples` are disabled in OpenBIS
configuration.* configuration.*
...@@ -550,6 +563,7 @@ was moved or copied without using the `move` or `copy` commands. ...@@ -550,6 +563,7 @@ was moved or copied without using the `move` or `copy` commands.
**token** **token**
``` ```
obis token get <session_name> [--validity-days] [--validity-weeks] [--validity-months] obis token get <session_name> [--validity-days] [--validity-weeks] [--validity-months]
``` ```
...@@ -602,7 +616,24 @@ echo content >> example_file ...@@ -602,7 +616,24 @@ echo content >> example_file
obis commit -m 'message' obis commit -m 'message'
``` ```
## 6. Big Data Link Services ## 6. Authentication
There are 2 ways to perform user authentication against OpenBIS.
### 6.1. Login
Obis, internally, stores a session token which is used to connect with OpenBIS. Whenever this token
is invalidated, obis will ask user to provide credentials to log into OpenBIS again.
### 6.2. Personal Access Token
Session token is short-lived and its interactive generation makes it unfeasible for usage in automatic
scripts. An alternative way to authorize is to generate personal access token (PAT), which can be
configured to last for a long periods of time.
PAT generation is explained in depth in `token` command section.
## 7. Big Data Link Services
The Big Data Link Services can be used to download files which are contained in an obis repository. The Big Data Link Services can be used to download files which are contained in an obis repository.
The services are included in the installation folder of openBIS, The services are included in the installation folder of openBIS,
...@@ -610,7 +641,7 @@ under `servers/big_data_link_services`. For how to configure and run them, consu ...@@ -610,7 +641,7 @@ under `servers/big_data_link_services`. For how to configure and run them, consu
the [README.md](https://sissource.ethz.ch/sispub/openbis/blob/master/big_data_link_server/README.md) the [README.md](https://sissource.ethz.ch/sispub/openbis/blob/master/big_data_link_server/README.md)
file. file.
## 7. Rationale for obis ## 8. Rationale for obis
Data-provenance tracking tools like openBIS make it possible to understand and follow the research Data-provenance tracking tools like openBIS make it possible to understand and follow the research
process. What was studied, what data was acquired and how, how was data analyzed to arrive at final process. What was studied, what data was acquired and how, how was data analyzed to arrive at final
...@@ -639,7 +670,7 @@ Using `git-annex`, even large binary artifacts can be tracked efficiently. For c ...@@ -639,7 +670,7 @@ Using `git-annex`, even large binary artifacts can be tracked efficiently. For c
openBIS, `obis` uses the openBIS API, which offers the power to register and track all metadata openBIS, `obis` uses the openBIS API, which offers the power to register and track all metadata
supported by openBIS. supported by openBIS.
## 8. Literature ## 9. Literature
V. Korolev, A. Joshi, V. Korolev, M.A. Grasso, A. Joshi, M.A. Grasso, et al., "PROB: A tool for V. Korolev, A. Joshi, V. Korolev, M.A. Grasso, A. Joshi, M.A. Grasso, et al., "PROB: A tool for
tracking provenance and reproducibility of big data experiments", Reproduce '14. HPCA 2014, vol. 11, tracking provenance and reproducibility of big data experiments", Reproduce '14. HPCA 2014, vol. 11,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment