diff --git a/app-openbis-command-line/src/python/README.md b/app-openbis-command-line/src/python/README.md index 4289ef5d967298f97f95b32d1ed2be63896cc638..f5c8dcd1875362ce7ba969aeac8124b667cb55cb 100644 --- a/app-openbis-command-line/src/python/README.md +++ b/app-openbis-command-line/src/python/README.md @@ -1,14 +1,37 @@ # oBIS -oBIS is a command-line tool to handle dataSets that are too big to store in openBIS but still need to be registered and tracked in openBIS. - -## Prerequisites +oBIS is a command-line tool that makes it possible to handle data sets tracked by OpenBIS, +where users have complete freedom to structure and manipulate the data as they wish, while retaining +the benefits of openBIS. + +With oBIS, it is possible not only to handle datasets stored in OpenBIS but also available to keep +only metadata send to openBIS, while the data itself is managed externally, by the user. In this +case, OpenBIS is aware of its existence and the data can be used for provenance tracking. + +# Table of contents + +1. [Prerequisites and installation](#1-prerequisites) +2. [Installation](#2-installation) +3. [Usage](#3-usage) +4. [Work modes](#4-work-modes) + 1. [Standard Data Store](#41-standard-data-store) + 1. [Commands](#411-commands) + 2. [Examples](#412-examples) + 2. [External Data Store](#42-external-data-store) + 1. [Settings](#421-settings) + 2. [Commands](#422-commands) + 3. [Examples](#423-examples) +5. [Big Data Link Services](#5-big-data-link-services) +6. [Rationale for obis](#6-rationale-for-obis) +7. [Literature](#7-literature) + +## 1. Prerequisites - python 3.6 or higher - git 2.11 or higher - git-annex 6 or higher [Installation guide](https://git-annex.branchable.com/install/) -## Installation +## 2. Installation ``` pip3 install obis @@ -16,12 +39,13 @@ pip3 install obis Since `obis` is based on `pybis`, the pip command will also install pybis and all its dependencies. -## Usage +## 3. Usage -### Help is your friend! +### 3.1 Help is your friend! -``` $ obis --help + +``` Usage: obis [OPTIONS] COMMAND [ARGS]... Options: @@ -38,7 +62,7 @@ Commands: commit Commit the repository to git and inform openBIS. config Get/set configurations. data_set Get/set settings related to the data set. - download Download files of a linked data set. + download Download files of a data set. init Initialize the folder as a data repository. init_analysis Initialize the folder as an analysis folder. move Move the repository found in the given data set id. @@ -49,6 +73,7 @@ Commands: status Show the state of the obis repository. sync Sync the repository with openBIS. token create/show a openBIS token + upload Upload files to form a data set. ``` To show detailed help for a specific command, type `obis <command> --help` : @@ -64,9 +89,245 @@ Options: --help Show this message and exit. ``` -## Settings +## 4. Work modes + +oBIS command line tool can work in two modes depending on how data is stored: + +1. Standard Data Store mode +2. External Data Store mode + +**Warning:** Each repository can work in a single mode only! Mixing modes is not supported. + +Depending on the mode, some commands may be unavailable or behave differently. Please read details +in the adequate section. + +Here is a short summary of which commands are available in given modes: + +| Command | Standard Data Store | External Data Store | +|------------------|:-------------------:|:-------------------:| +| addref | ⌠| ✅ | +| clone | ⌠| ✅ | +| collection get | ✅ | ✅ | +| collection set | ✅ | ✅ | +| collection clear | ⌠| ✅ | +| commit | ⌠| ✅ | +| config get | ✅ | ✅ | +| config set | ✅ | ✅ | +| config clear | ⌠| ✅ | +| data_set get | ⌠| ✅ | +| data_set set | ⌠| ✅ | +| data_set clear | ⌠| ✅ | +| data_set search | ✅ | ⌠| +| download | ✅ | ⌠| +| init | ⌠| ✅ | +| init -p | ✅ | ⌠| +| init_analysis | ⌠| ✅ | +| move | ⌠| ✅ | +| object get | ✅ | ✅ | +| object set | ✅ | ✅ | +| object clear | ⌠| ✅ | +| object search | ✅ | ⌠| +| removeref | ⌠| ✅ | +| repository get | ⌠| ✅ | +| repository set | ⌠| ✅ | +| repository clear | ⌠| ✅ | +| settings get | ⌠| ✅ | +| settings set | ⌠| ✅ | +| settings clear | ⌠| ✅ | +| status | ⌠| ✅ | +| sync | ⌠| ✅ | +| token | ⌠| ✅ | +| upload | ✅ | ⌠| + +## 4.1 Standard Data Store + +Standard Data Store mode depicts a workflow where datasets are stored directly in the OpenBIS +instance. In this mode user can download/upload files to OpenBIS, search for objects/datasets +fulfilling filtering criteria +and get/set properties of objects/collections represented by datasets in current repository. + +## 4.1.1 Commands + +**collection** + +``` +obis collection get [key1] [key2] ... +obis collection set [key1]=[value1], [key2]=[value2] ... +``` + +With `collection` command, obis crawls through current repository and gathers all data set ids and +then - if +data set is connected directly to a collection - gets or sets given properties to it in OpenBIS + +*Note some property names may require to be encapsulated in '', e.g. '$name'* + +**config** + +``` +obis config get [key] +obis config set [key]=[value] +``` + +With `config` command, obis can get/set config of a local repository, e.g. when setting access link +to OpenBIS instance + +The settings are saved within the obis repository, in the `.obis` folder, as JSON files, or +in `~/.obis` for the global settings. They can be added/edited manually, which might be useful when +it comes to integration with other tools. + +**Example `.obis/config.json`** + +``` +{ + "fileservice_url": null, + "git_annex_hash_as_checksum": true, + "hostname": "bsse-bs-dock-5-160.ethz.ch", + "is_physical": true, + "openbis_url": "http://localhost:8888" +} +``` + +**data_set** + +``` +obis data_set search [OPTIONS] + +Options: + -type, --type TEXT Type code to filter by + -space, --space TEXT Space code + -project, --project TEXT Full project identification code + -experiment, --experiment TEXT Full experiment code + -property TEXT Property code + -property-value TEXT Property value + -save, --save TEXT Directory name to save results +``` + +With `data_set search` command, obis connects to a configured OpenBIS instance and searches for all +data sets that fulfill given filtering criteria. +At least one filtering criteria must be specified. Resulting data set files can be downloaded by +using `save` option. + +*Note: Filtering by `-project` may not work when `Project Samples` are disabled in OpenBIS +configuration.* + +**download** + +``` +obis download [options] [data_set_id] +``` + +The `download` command downloads, the files of a given data set from the OpenBIS instance specified +in `config`. This command requires the DownloadHandler / FileInfoHandler microservices to be running +and the `fileservice_url` needs to be configured. -With `get` you retrieve one or more settings. If the `key` is omitted, you retrieve all settings of the `type`: +**init** + +``` +obis init -p [folder] +``` + +If a folder is given, obis will initialize that folder as an obis repository that works in the +Standard Data Store mode. +If not, it will use the current folder. + +**object get / set** + +``` +obis collection get [key1] [key2] ... +obis collection set [key1]=[value1], [key2]=[value2] ... +``` + +With `get` and `set` commands, obis crawls through current repository and gathers all data set ids +and then - if +data set is connected directly to an object - gets or sets given properties to it in OpenBIS + +*Note some property names may require to be encapsulated in '', e.g. '$name'* + +**object search** + +``` +obis object search [OPTIONS] + +Options: + -type, --type TEXT Type code to filter by + -space, --space TEXT Space code + -project, --project TEXT Full project identification code + -experiment, --experiment TEXT Full experiment code + -property TEXT Property code + -property-value TEXT Property value + -save, --save TEXT File name to save results in csv format +``` + +With `object search` command, obis connects to a configured OpenBIS instance and searches for all +objects/samples that fulfill given filtering criteria. +At least one filtering criteria must be specified. Search results can be downloaded int a file by +using `-save` option. + +*Note: Filtering by `-project` may not work when `Project Samples` are disabled in OpenBIS +configuration.* + +**upload** + +``` +obis upload [sample_id] [data_set_type] [OPTIONS] +``` + +With `upload` command, a new data set of type `data_set_type` will be created under +object `sample_id`. Files and folders specified with `-f` flag will be uploaded to a newly created +data set. + +### 4.1.2 Examples + +**Create an obis repository to work in Standard Data Store mode** + +``` +# global settings to be use for all obis repositories +obis config -g set openbis_url=https://localhost:8888 +obis config -g set user=admin +# create an obis repository with a folder name +obis init -p data1 +cd data1 +# check configuration +obis config get is_physical +# search for objects of type BACTERIA in sapce TESTID in OpenBIS +obis object search -space TESTID -type BACTERIA +# save search results in a files +obis object search -space TESTID -type BACTERIA -save results.csv +obis object search -space TESTID -save results_space.csv +# upload files to an existing object as type RAW_DATA +obis upload 20230228133001314-59 RAW_DATA -f results.csv -f results_space.csv +``` + +**download datasets of an object and check properties** + +``` +# assuming we are in a configured obis repository +obis download 20230228091119011-58 +# set object name to XYZ +obis object set '$name'=XYZ +# set children of an object to /TESTID/PROJECT_101/PROJECT_101_EXP_3 +obis object set children=/TESTID/PROJECT_101/PROJECT_101_EXP_3 +``` + +## 4.2 External Data Store + +External Data Store mode allows for orderly management of data in +conditions that require great flexibility. oBIS makes it possible to track data on a file system, +where users have complete freedom to structure and manipulate the data as they wish, while retaining +the benefits of openBIS. With oBIS, only metadata is actually stored and managed by openBIS. The +data itself is managed externally, by the user, but openBIS is aware of its existence and the data +can be used for provenance tracking. + +Under the covers, obis takes advantage of publicly available and tested tools to manage data on the +file system. In particular, it uses git and git-annex to track the content of a dataset. Using +git-annex, even large binary artifacts can be tracked efficiently. For communication with openBIS, +obis uses the openBIS API, which offers the power to register and track all metadata supported by +openBIS. + +### 4.2.1 Settings + +With `get` you retrieve one or more settings. If the `key` is omitted, you retrieve all settings of +the `type`: ``` obis [type] [options] get [key] @@ -90,12 +351,13 @@ With the type `settings` you can get all settings at once: obis settings [options] get ``` -The option `-g` can be used to interact with the global settings. The global settings are stored in `~/.obis` and are copied to an obis repository when that is created. +The option `-g` can be used to interact with the global settings. The global settings are stored +in `~/.obis` and are copied to an obis repository when that is created. Following settings exist: | type | setting | description | -| ---------- | ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +|------------|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | collection |  `id` | Identifier of the collection the created data set is attached to. Use either this or the object id. | | config | `allow_only_https` | Default is true. If false, http can be used to connect to openBIS. | | config | `fileservice_url` | URL for downloading files. See DownloadHandler / FileInfoHandler services. | @@ -104,7 +366,7 @@ Following settings exist: | config | `hostname` | Hostname to be used when cloning / moving a data set to connect to the machine where the original copy is located. | | config | `openbis_url` | URL for connecting to openBIS (only protocol://host:port, without a path). | | config | `openbis_token` | Token to use when connecting to openBIS. Can be either a session token or a personal access token. Alternatively, it can be a path to a file containing the token. | -| config | `session_name` | The name every personal access token is associated with. | +| config | `session_name` | The name every personal access token is associated with. | | config | `obis_metadata_folder` | Absolute path to the folder which obis will use to store its metadata. If not set, the metadata will be stored in the same location as the data. This setting can be useful when dealing with read-only access to the data. The clone and move commands will not work when this is set. | | config | `user` | User for connecting to openBIS. | | data_set | `type` | Data set type of data sets created by obis. | @@ -114,7 +376,9 @@ Following settings exist: | repository | `external_dms_id` | This is set by obis. Id of the external dms in openBIS. | | repository | `id` | This is set by obis. Id of the obis repository. | -The settings are saved within the obis repository, in the `.obis` folder, as JSON files, or in `~/.obis` for the global settings. They can be added/edited manually, which might be useful when it comes to integration with other tools. +The settings are saved within the obis repository, in the `.obis` folder, as JSON files, or +in `~/.obis` for the global settings. They can be added/edited manually, which might be useful when +it comes to integration with other tools. **Example `.obis/config.json`** @@ -139,7 +403,7 @@ The settings are saved within the obis repository, in the `.obis` folder, as JSO } ``` -## Commands +## 4.2.2 Commands **init** @@ -147,7 +411,8 @@ The settings are saved within the obis repository, in the `.obis` folder, as JSO obis init [folder] ``` -If a folder is given, obis will initialize that folder as an obis repository. If not, it will use the current folder. +If a folder is given, obis will initialize that folder as an obis in the External Data Store mode. +If not, it will use the current folder. **init_analysis** @@ -155,7 +420,9 @@ If a folder is given, obis will initialize that folder as an obis repository. If obis init_analysis [options] [folder] ``` -With init_analysis, a repository can be created which is derived from a parent repository. If it is called from within a repository, that will be used as a parent. If not, the parent has to be given with the `-p` option. +With init_analysis, a repository can be created which is derived from a parent repository. If it is +called from within a repository, that will be used as a parent. If not, the parent has to be given +with the `-p` option. **commit** @@ -163,7 +430,8 @@ With init_analysis, a repository can be created which is derived from a parent r obis commit [options] ``` -The `commit` command adds files to a new data set in openBIS. If the `-m` option is not used to define a commit message, the user will be asked to provide one. +The `commit` command adds files to a new data set in openBIS. If the `-m` option is not used to +define a commit message, the user will be asked to provide one. **sync** @@ -171,7 +439,9 @@ The `commit` command adds files to a new data set in openBIS. If the `-m` option obis sync ``` -When git commits have been done manually, the `sync` command creates the corresponding data set in openBIS. Note that, when interacting with git directly, use the git annex commands whenever applicable, e.g. use "git annex add" instead of "git add". +When git commits have been done manually, the `sync` command creates the corresponding data set in +openBIS. Note that, when interacting with git directly, use the git annex commands whenever +applicable, e.g. use "git annex add" instead of "git add". **status** @@ -179,7 +449,8 @@ When git commits have been done manually, the `sync` command creates the corresp obis status [folder] ``` -This shows the status of the repository folder from which it is invoked, or the one given as a parameter. It shows file changes and whether the repository needs to be synchronized with openBIS. +This shows the status of the repository folder from which it is invoked, or the one given as a +parameter. It shows file changes and whether the repository needs to be synchronized with openBIS. **clone** @@ -187,11 +458,14 @@ This shows the status of the repository folder from which it is invoked, or the obis clone [options] [data_set_id] ``` -The `clone` command copies a repository associated with a data set and registers the new copy in openBIS. In case there are already multiple copied of the repository, obis will ask from which copy to clone. +The `clone` command copies a repository associated with a data set and registers the new copy in +openBIS. In case there are already multiple copied of the repository, obis will ask from which copy +to clone. - To avoid user interaction, the copy index can be chosen with the option `-c` - With the option `-u` a user can be defined for copying the files from a remote system -- By default, the file integrity is checked by calculating the checksum. This can be skipped with `-s`. +- By default, the file integrity is checked by calculating the checksum. This can be skipped + with `-s`. _Note_: This command does not work when `obis_metadata_folder` is set. @@ -205,14 +479,6 @@ The `move` command works the same as `clone`, except that the old repository wil Note: This command does not work when `obis_metadata_folder` is set. -**download** - -``` -obis download [options] [data_set_id] -``` - -The `download` command downloads the files of a data set. Contrary to `clone`, this will not register another copy in openBIS. It is only for accessing files. This command requires the DownloadHandler / FileInfoHandler microservices to be running and the `fileservice_url` needs to be configured. - **addref / removeref** ``` @@ -220,7 +486,8 @@ obis addref obis removeref ``` -Obis repository folders can be added or removed from openBIS. This can be useful when a repository was moved or copied without using the `move` or `copy` commands. +Obis repository folders can be added or removed from openBIS. This can be useful when a repository +was moved or copied without using the `move` or `copy` commands. **token** @@ -228,11 +495,14 @@ Obis repository folders can be added or removed from openBIS. This can be useful obis token get <session_name> [--validity-days] [--validity-weeks] [--validity-months] ``` -Gets or creates a new personal access token (PAT) and stores it in the obis configuration. If no `session_name` is provided or is not stored in the configuration, you'll be asked interactively. If no validity period is provided, the maximum (configured by the server) is used. If a PAT with this `session_name` already exists and it is going to expire soon (according to server setting `personal_access_tokens_validity_warning_period`), a new PAT will be created, stored in the obis configuration and used for every subsequent request. - +Gets or creates a new personal access token (PAT) and stores it in the obis configuration. If +no `session_name` is provided or is not stored in the configuration, you'll be asked interactively. +If no validity period is provided, the maximum (configured by the server) is used. If a PAT with +this `session_name` already exists and it is going to expire soon (according to server +setting `personal_access_tokens_validity_warning_period`), a new PAT will be created, stored in the +obis configuration and used for every subsequent request. - -## Examples +### 4.2.3 Examples **Create an obis repository and commit to openBIS** @@ -273,21 +543,46 @@ echo content >> example_file obis commit -m 'message' ``` -## Big Data Link Services - -The Big Data Link Services can be used to download files which are contained in an obis repository. The services are included in the installation folder of openBIS, under `servers/big_data_link_services`. For how to configure and run them, consult the [README.md](https://sissource.ethz.ch/sispub/openbis/blob/master/big_data_link_server/README.md) file. - -## Rationale for obis - -Data-provenance tracking tools like openBIS make it possible to understand and follow the research process. What was studied, what data was acquired and how, how was data analyzed to arrive at final results for publication -- this is information that is captured in openBIS. In the standard usage scenario, openBIS stores and manages data directly. This has the advantage that openBIS acts as a gatekeeper to the data, making it easy to keep backups or enforce access restrictions, etc. However, this way of working is not a good solution for all situations. - -Some research groups work with large amounts of data (e.g., multiple TB), which makes it inefficient and impractical to give openBIS control of the data. Other research groups require that data be stored on a shared file system under a well-defined directory structure, be it for historical reasons or because of the tools they use. In this case as well, it is difficult to give openBIS full control of the data. - -For situations like these, we have developed `obis`, a tool for orderly management of data in conditions that require great flexibility. `obis` makes it possible to track data on a file system, where users have complete freedom to structure and manipulate the data as they wish, while retaining the benefits of openBIS. With `obis`, only metadata is actually stored and managed by openBIS. The data itself is managed externally, by the user, but openBIS is aware of its existence and the data can be used for provenance tracking. `obis` is packaged as a stand-alone utility, which, to be available, only needs to be added to the `PATH` variable in a UNIX or UNIX-like environment. - -Under the covers, `obis` takes advantage of publicly available and tested tools to manage data on the file system. In particular, it uses `git` and `git-annex` to track the content of a dataset. Using `git-annex`, even large binary artifacts can be tracked efficiently. For communication with openBIS, `obis` uses the openBIS API, which offers the power to register and track all metadata supported by openBIS. - -## Literature - -V. Korolev, A. Joshi, V. Korolev, M.A. Grasso, A. Joshi, M.A. Grasso, et al., "PROB: A tool for tracking provenance and reproducibility of big data experiments", Reproduce '14. HPCA 2014, vol. 11, pp. 264-286, 2014. +## 5. Big Data Link Services + +The Big Data Link Services can be used to download files which are contained in an obis repository. +The services are included in the installation folder of openBIS, +under `servers/big_data_link_services`. For how to configure and run them, consult +the [README.md](https://sissource.ethz.ch/sispub/openbis/blob/master/big_data_link_server/README.md) +file. + +## 6. Rationale for obis + +Data-provenance tracking tools like openBIS make it possible to understand and follow the research +process. What was studied, what data was acquired and how, how was data analyzed to arrive at final +results for publication -- this is information that is captured in openBIS. In the standard usage +scenario, openBIS stores and manages data directly. This has the advantage that openBIS acts as a +gatekeeper to the data, making it easy to keep backups or enforce access restrictions, etc. However, +this way of working is not a good solution for all situations. + +Some research groups work with large amounts of data (e.g., multiple TB), which makes it inefficient +and impractical to give openBIS control of the data. Other research groups require that data be +stored on a shared file system under a well-defined directory structure, be it for historical +reasons or because of the tools they use. In this case as well, it is difficult to give openBIS full +control of the data. + +For situations like these, we have developed `obis`, a tool for orderly management of data in +conditions that require great flexibility. `obis` makes it possible to track data on a file system, +where users have complete freedom to structure and manipulate the data as they wish, while retaining +the benefits of openBIS. With `obis`, only metadata is actually stored and managed by openBIS. The +data itself is managed externally, by the user, but openBIS is aware of its existence and the data +can be used for provenance tracking. `obis` is packaged as a stand-alone utility, which, to be +available, only needs to be added to the `PATH` variable in a UNIX or UNIX-like environment. + +Under the covers, `obis` takes advantage of publicly available and tested tools to manage data on +the file system. In particular, it uses `git` and `git-annex` to track the content of a dataset. +Using `git-annex`, even large binary artifacts can be tracked efficiently. For communication with +openBIS, `obis` uses the openBIS API, which offers the power to register and track all metadata +supported by openBIS. + +## 7. Literature + +V. Korolev, A. Joshi, V. Korolev, M.A. Grasso, A. Joshi, M.A. Grasso, et al., "PROB: A tool for +tracking provenance and reproducibility of big data experiments", Reproduce '14. HPCA 2014, vol. 11, +pp. 264-286, 2014. http://ebiquity.umbc.edu/_file_directory_/papers/693.pdf diff --git a/app-openbis-command-line/src/python/obis/dm/data_mgmt.py b/app-openbis-command-line/src/python/obis/dm/data_mgmt.py index 0704af6505a4173939f3df386dccb90df73cb2c8..96e5e902c970764a97ffd010c3525a5ec9048c44 100644 --- a/app-openbis-command-line/src/python/obis/dm/data_mgmt.py +++ b/app-openbis-command-line/src/python/obis/dm/data_mgmt.py @@ -584,7 +584,6 @@ class GitDataMgmt(AbstractDataMgmt): assert prop is not None assert value is not None elif operation_type is OperationType.GET: - assert prop is not None assert value is None elif operation_type is OperationType.CLEAR: assert value is None @@ -684,7 +683,6 @@ class PhysicalDataMgmt(AbstractDataMgmt): assert prop is not None assert value is not None elif operation_type is OperationType.GET: - assert prop is not None assert value is None elif operation_type is OperationType.CLEAR: self.error_raise(f"{category} clear", @@ -696,6 +694,9 @@ class PhysicalDataMgmt(AbstractDataMgmt): elif category == "collection": cmd = Collection(self, operation_type, prop, value) return cmd.run() - else: + elif category == "config": return self.update_config(resolver, self.debug, is_global, is_data_set_property, operation_type, prop, value) + else: + self.error_raise(f"{category} {operation_type}", + "This command is only available for External Manager Data") diff --git a/app-openbis-command-line/src/python/obis/scripts/cli.py b/app-openbis-command-line/src/python/obis/scripts/cli.py index c07666cd593c8980bddf95219c08695bd3125c97..e1259736b4c87f82c0ac0e953073c4ae87ca092e 100644 --- a/app-openbis-command-line/src/python/obis/scripts/cli.py +++ b/app-openbis-command-line/src/python/obis/scripts/cli.py @@ -274,8 +274,8 @@ _search_params = [ click.option('-space', '--space', default=None, help='Space code'), click.option('-project', '--project', default=None, help='Full project identification code'), click.option('-experiment', '--experiment', default=None, help='Full experiment code'), - click.option('-property', '--property', 'property_code', default=None, help='Property code'), - click.option('-property-value', '--property-value', 'property_value', default=None, + click.option('-property', 'property_code', default=None, help='Property code'), + click.option('-property-value', 'property_value', default=None, help='Property value'), click.option('-save', '--save', default=None, help='Filename to save results'), ]