Skip to content
Snippets Groups Projects
Commit 1745542e authored by Adam Laskowski's avatar Adam Laskowski
Browse files

SSDM-13300: Change how data_set search command works, added functionality to...

SSDM-13300: Change how data_set search command works, added functionality to download command to read data from the input csv file. Fixed documentation.
parent c0bde061
No related branches found
No related tags found
1 merge request!40SSDM-13578 : 2PT : Database and V3 Implementation - include the new AFS "free"...
......@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import csv
import os
from .openbis_command import OpenbisCommand
......@@ -26,12 +27,16 @@ class DownloadPhysical(OpenbisCommand):
Command to download physical files of a data set.
"""
def __init__(self, dm, data_set_id, file, skip_integrity_check):
def __init__(self, dm, data_set_id, from_file, file, skip_integrity_check):
"""
:param dm: data management
:param data_set_id: permId of the data set to be cloned
:param dm: data management.
:param data_set_id: permId of the data set to be cloned.
:param from_file: Path to a CSV file with a list of datasets to download.
:param file: path to a specific file to download from a dataset.
:param skip_integrity_check: boolean flag indicating whether to skip checksum validation.
"""
self.data_set_id = data_set_id
self.from_file = from_file
self.files = [file] if file is not None else None
self.skip_integrity_check = skip_integrity_check
self.load_global_config(dm)
......@@ -42,25 +47,37 @@ class DownloadPhysical(OpenbisCommand):
return CommandResult(returncode=-1,
output="Configuration fileservice_url needs to be set for download.")
data_set = self.openbis.get_dataset(self.data_set_id)
files = self.files if self.files is not None else data_set.file_list
if self.from_file is not None:
with cd(self.data_mgmt.invocation_path):
with open(self.from_file, newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
self.download_dataset(row['permId'])
if reader.line_num == 1: # First row contains headers
click_echo("No data sets were found in provided file!")
else:
self.download_dataset(self.data_set_id)
return CommandResult(returncode=0, output="Download completed.")
def download_dataset(self, perm_id):
data_set = self.openbis.get_dataset(perm_id)
files = self.files if self.files is not None else data_set.file_list
with cd(self.data_mgmt.invocation_path):
target_folder = data_set.download(files, destination=self.data_mgmt.invocation_path)
if self.skip_integrity_check is not True:
invalid_files = validate_checksum(self.openbis, files, data_set.permId,
target_folder, None)
self.redownload_invalid_files_on_demand(invalid_files, target_folder)
return CommandResult(returncode=0, output="Files downloaded to: %s" % target_folder)
self.redownload_invalid_files_on_demand(invalid_files, target_folder, perm_id)
click_echo(f"Files from dataset {perm_id} has been downloaded to {target_folder}")
def redownload_invalid_files_on_demand(self, invalid_files, target_folder):
def redownload_invalid_files_on_demand(self, invalid_files, target_folder, perm_id):
if len(invalid_files) == 0:
return
yes_or_no = None
while yes_or_no != "yes" and yes_or_no != "no":
click_echo("Integrity check failed for following files:\n" +
click_echo(f"Integrity check failed for following files in dataset {perm_id}:\n" +
str(invalid_files) + "\n" +
"Either the download failed or the files where changed after committing to openBIS.\n" +
"Either the download failed or the files where changed in the OpenBIS.\n" +
"Should the files be downloaded again? (yes/no)")
yes_or_no = input('> ')
if yes_or_no == "yes":
......@@ -68,4 +85,4 @@ class DownloadPhysical(OpenbisCommand):
filename_dest = os.path.join(target_folder, file)
os.remove(filename_dest)
self.files = invalid_files
return self.run()
return self.download_dataset(perm_id)
......@@ -13,8 +13,6 @@
# limitations under the License.
#
import os
from .openbis_command import OpenbisCommand
from ..command_result import CommandResult
from ..utils import cd
......@@ -84,7 +82,7 @@ class Search(OpenbisCommand):
self.property_code: self.property_value,
}
search_results = self.openbis.get_samples(
datasets = self.openbis.get_datasets(
space=self.space,
project=self.project, # Not Supported with Project Samples disabled
experiment=self.experiment,
......@@ -94,43 +92,11 @@ class Search(OpenbisCommand):
props="*" # Fetch all properties
)
collections = self.openbis.get_collections(
space=self.space,
project=self.project,
type=self.type_code,
where=properties,
props="*" # Fetch all properties
)
click_echo("Looking for data sets")
datasets = []
perm_ids = set()
for sample in search_results:
ds = sample.get_datasets()
for ds_object in ds.objects:
datasets += [ds_object] if ds_object.permId not in perm_ids else []
perm_ids.add(ds_object.permId)
for collection in collections:
ds = collection.get_datasets()
for ds_object in ds.objects:
datasets += [ds_object] if ds_object.permId not in perm_ids else []
perm_ids.add(ds_object.permId)
click_echo(f"Data sets found: {len(datasets)}")
if self.save_path is not None:
click_echo(f"Saving search results in {self.save_path}")
with cd(self.data_mgmt.invocation_path):
if os.path.exists(self.save_path) is True and os.path.isdir(
self.save_path) is False:
return CommandResult(returncode=-1,
output=f"File {self.save_path} is not a directory")
if os.path.isdir(self.save_path) is False:
click_echo(f"Creating directory {self.save_path}")
os.makedirs(self.save_path)
click_echo(
f"Saving search results in {os.path.join(self.data_mgmt.invocation_path, self.save_path)}")
for dataset in datasets:
dataset.download(destination=self.save_path,
linked_dataset_fileservice_url=self.fileservice_url() + "/download")
datasets.df.to_csv(self.save_path, index=False)
else:
click_echo(f"Search results:\n{datasets}")
......
......@@ -210,9 +210,10 @@ class AbstractDataMgmt(metaclass=abc.ABCMeta):
return
@abc.abstractmethod
def download(self, data_set_id, file, skip_integrity_check):
def download(self, data_set_id, from_file, file, skip_integrity_check):
"""Download files of a repository without adding a content copy.
:param data_set_id: Id of the data set to download from.
:param from_file: Path of a file with a list of datasets to download.
:param file: Path of a file in the data set to download. All files are downloaded if it is None.
:param skip_integrity_check: Checksums of files are not verified if true.
"""
......@@ -349,7 +350,7 @@ class NoGitDataMgmt(AbstractDataMgmt):
def removeref(self, data_set_id=None):
self.error_raise("removeref", "No git command found.")
def download(self, data_set_id, file, skip_integrity_check):
def download(self, *_):
self.error_raise("download", "No git command found.")
def search_object(self, *_):
......@@ -559,7 +560,7 @@ class GitDataMgmt(AbstractDataMgmt):
cmd = Removeref(self, data_set_id=data_set_id)
return cmd.run()
def download(self, data_set_id, file, skip_integrity_check):
def download(self, data_set_id, from_file, file, skip_integrity_check):
self.error_raise("download", "This command is only available for Manager Data.")
#
......@@ -645,8 +646,8 @@ class PhysicalDataMgmt(AbstractDataMgmt):
def removeref(self, data_set_id=None):
self.error_raise("removeref", "This command is only available for External Manager Data")
def download(self, data_set_id, file, skip_integrity_check):
cmd = DownloadPhysical(self, data_set_id, file, skip_integrity_check)
def download(self, data_set_id, from_file, file, skip_integrity_check):
cmd = DownloadPhysical(self, data_set_id, from_file, file, skip_integrity_check)
return cmd.run()
def upload(self, sample_id, data_set_type, files):
......
......@@ -275,6 +275,7 @@ _search_params = [
click.option('-space', '--space', default=None, help='Space code'),
click.option('-project', '--project', default=None, help='Full project identification code'),
click.option('-experiment', '--experiment', default=None, help='Full experiment code'),
click.option('-type', '--type', 'type_code', default=None, help='Type code'),
click.option('-property', 'property_code', default=None, help='Property code'),
click.option('-property-value', 'property_value', default=None,
help='Property value'),
......@@ -758,7 +759,10 @@ def removeref(ctx, data_set_id, repository):
# download
_download_params = [
click.argument('data_set_id'),
click.argument('data_set_id', required=False),
click.option('-from-file', '--from-file', 'from_file',
help='An output .CSV file from `obis data_set search` command with the list of' +
' objects to download datasets from'),
click.option(
'-f', '--file', help='File in the data set to download - downloading all if not given.'),
click.option('-s', '--skip_integrity_check', default=False, is_flag=True,
......@@ -766,22 +770,21 @@ _download_params = [
]
@data_set.command("download", short_help="Download files of a data set.")
@add_params(_download_params)
@click.pass_context
def data_set_download(ctx, file, data_set_id, skip_integrity_check):
return ctx.obj['runner'].run("download",
lambda dm: dm.download(data_set_id=data_set_id, file=file,
skip_integrity_check=skip_integrity_check))
@cli.command("download", short_help="Download files of a data set.")
@add_params(_download_params)
@click.pass_context
def download(ctx, file, data_set_id, skip_integrity_check):
def download(ctx, data_set_id, from_file, file, skip_integrity_check):
""" Downloads dataset files from OpenBIS instance.\n
DATA_SET Unique identifier of dataset within OpenBIS instance."""
if (data_set_id is None and from_file is None) or (
data_set_id is not None and from_file is not None):
click_echo("'data_set_id' or 'from_file' must be provided!")
return -1
ctx.obj['runner'] = DataMgmtRunner(ctx.obj, halt_on_error_log=False)
ctx.invoke(data_set_download, file=file,
data_set_id=data_set_id, skip_integrity_check=skip_integrity_check)
return ctx.obj['runner'].run("download",
lambda dm: dm.download(data_set_id=data_set_id,
from_file=from_file, file=file,
skip_integrity_check=skip_integrity_check))
# upload
......@@ -795,20 +798,17 @@ _upload_params = [
]
@data_set.command("upload", short_help="Upload files to form a data set.")
@add_params(_upload_params)
@click.pass_context
def data_set_upload(ctx, sample_id, data_set_type, files):
return ctx.obj['runner'].run("upload",
lambda dm: dm.upload(sample_id, data_set_type, files))
@cli.command("upload", short_help="Upload files to form a data set.")
@add_params(_upload_params)
@click.pass_context
def download(ctx, sample_id, data_set_type, files):
def upload(ctx, sample_id, data_set_type, files):
""" Creates data set under object and upload files to it.\n
SAMPLE_ID Unique identifier an object in OpenBIS.\n
DATA_SET_TYPE Newly created data set type.
"""
ctx.obj['runner'] = DataMgmtRunner(ctx.obj, halt_on_error_log=False)
ctx.invoke(data_set_upload, files=files, sample_id=sample_id, data_set_type=data_set_type)
ctx.obj['runner'].run("upload",
lambda dm: dm.upload(sample_id, data_set_type, files))
# clone
......
......@@ -204,6 +204,7 @@ Options:
-space, --space TEXT Space code
-project, --project TEXT Full project identification code
-experiment, --experiment TEXT Full experiment code
-type, --type TEXT Type code
-property TEXT Property code
-property-value TEXT Property value
-save, --save TEXT Directory name to save results
......@@ -211,7 +212,7 @@ Options:
With `data_set search` command, obis connects to a configured OpenBIS instance and searches for all
data sets that fulfill given filtering criteria.
At least one filtering criteria must be specified. Resulting data set files can be downloaded by
At least one filtering criteria must be specified. Search results can be downloaded by
using `save` option.
*Note: Filtering by `-project` may not work when `Project Samples` are disabled in OpenBIS
......@@ -221,6 +222,15 @@ configuration.*
```
obis download [options] [data_set_id]
Options:
-from-file, --from-file TEXT An output .CSV file from `obis data_set search`
command with the list of objects to download
data sets from
-f, --file TEXT File in the data set to download - downloading
all if not given.
-s, --skip_integrity_check Flag to skip file integrity check with
checksums
```
The `download` command downloads, the files of a given data set from the OpenBIS instance specified
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment