Newer
Older
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright ETH 2018 - 2023 Zürich, Scientific IT Services
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import abc
import os
import shutil
import signal
import sys
import requests
from .command_result import CommandResult
from .commands.addref import Addref
from .commands.clone import Clone
from .commands.download import Download
from .commands.openbis_sync import OpenbisSync
from .commands.removeref import Removeref
from .utils import Type
from .utils import cd
from .utils import complete_git_config
from .utils import complete_openbis_config
from .utils import default_echo
# noinspection PyPep8Naming
def DataMgmt(echo_func=None, settings_resolver=None, openbis_config={}, git_config={},
openbis=None, log=None, debug=False, login=True, repository_type=Type.UNKNOWN):
"""Factory method for DataMgmt instances"""
echo_func = echo_func if echo_func is not None else default_echo
data_path = git_config['data_path']
metadata_path = git_config['metadata_path']
invocation_path = git_config['invocation_path']
if settings_resolver is None:
settings_resolver = dm_config.SettingsResolver()
if repository_type == Type.UNKNOWN:
if os.path.exists('.obis'):
config_dict = settings_resolver.config.config_dict()
if config_dict['is_physical'] is True:
repository_type = Type.PHYSICAL
else:
repository_type = Type.LINK
else:
repository_type = Type.LINK
if repository_type == Type.PHYSICAL:
return PhysicalDataMgmt(settings_resolver, None, None, openbis, log, data_path, metadata_path, invocation_path)
else:
complete_git_config(git_config)
git_wrapper = GitWrapper(**git_config)
if not git_wrapper.can_run():
# TODO We could just as well throw an error here instead of creating
# creating the NoGitDataMgmt which will fail later.
return NoGitDataMgmt(settings_resolver, None, git_wrapper, openbis, log, data_path, metadata_path, invocation_path)
complete_openbis_config(openbis_config, settings_resolver)
return GitDataMgmt(settings_resolver, openbis_config, git_wrapper, openbis, log, data_path, metadata_path, invocation_path, debug, login)
class AbstractDataMgmt(metaclass=abc.ABCMeta):
"""Abstract object that implements operations.
All operations throw an exepction if they fail.
"""
def __init__(self, settings_resolver, openbis_config, git_wrapper, openbis, log, data_path, metadata_path, invocation_path, debug=False, login=True):
self.settings_resolver = settings_resolver
Yves Noirjean
committed
self.openbis_config = openbis_config
self.git_wrapper = git_wrapper
Yves Noirjean
committed
self.openbis = openbis
self.data_path = data_path
self.metadata_path = metadata_path
self.invocation_path = invocation_path

yvesn
committed
self.debug = debug
# setting flags
self.restore_on_sigint = True
self.ignore_missing_parent = False
def error_raise(self, command, reason):
message = "'{}' failed. {}".format(command, reason)
raise ValueError(message)
@abc.abstractmethod
def get_settings_resolver(self):
""" Get the settings resolver """
return
@abc.abstractmethod
def setup_local_settings(self, all_settings):
""" Setup local settings - for using obis as a library """
return
@abc.abstractmethod
def init_data(self, desc=None):
"""Initialize a data repository at the path with the description.
:param path: Path for the repository.
:param desc: An optional short description of the repository (used by git-annex)
Chandrasekhar Ramakrishnan
committed
:param create: If True and the folder does not exist, create it. Defaults to true.
:return: A CommandResult.
"""
return
@abc.abstractmethod
def init_analysis(self, parent_folder, desc=None):
"""Initialize an analysis repository at the path.
:param parent_folder: Path for the repository.
:param parent: (required when outside of existing repository) Path for the parent repositort
:return: A CommandResult.
"""
return
@abc.abstractmethod
def commit(self, msg, auto_add=True, sync=True):
"""Commit the current repo.
This issues a git commit and connects to openBIS and creates a data set in openBIS.
Chandrasekhar Ramakrishnan
committed
:param msg: Commit message.
:param auto_add: Automatically add all files in the folder to the repo. Defaults to True.
:param sync: If true, sync with openBIS server.
:return: A CommandResult.
"""
@abc.abstractmethod
def sync(self):
This connects to openBIS and creates a data set in openBIS.
:return: A CommandResult.
"""
return
@abc.abstractmethod
def status(self):
"""Return the status of the current repository.
:return: A CommandResult.
"""
return
def clone(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
"""Clone / copy a repository related to the given data set id.
:param data_set_id:
:param ssh_user: ssh user for remote system (optional)
:param content_copy_index: index of content copy in case there are multiple copies (optional)
:param skip_integrity_check: if true, the file checksums will not be checked
:return: A CommandResult.
"""
return
@abc.abstractmethod
def move(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
"""Move a repository related to the given data set id.
:param data_set_id:
:param ssh_user: ssh user for remote system (optional)
:param content_copy_index: index of content copy in case there are multiple copies (optional)
:param skip_integrity_check: if true, the file checksums will not be checked
:return: A CommandResult.
"""
return
def addref(self):
"""Add the current folder as an obis repository to openBIS.
:return: A CommandResult.
"""
return
def removeref(self, data_set_id=None):
"""Remove the current folder / repository from openBIS.
:param data_set_id: Id of the data from which a reference should be removed.
:return: A CommandResult.
"""
return
def download(self, data_set_id, content_copy_index, file, skip_integrity_check):
"""Download files of a repository without adding a content copy.
:param data_set_id: Id of the data set to download from.
:param content_copy_index: Index of the content copy to download from.
:param file: Path of a file in the data set to download. All files are downloaded if it is None.
:param skip_integrity_check: Checksums of files are not verified if true.
"""
class NoGitDataMgmt(AbstractDataMgmt):
"""DataMgmt operations when git is not available -- show error messages."""
def get_settings_resolver(self):
self.error_raise("get settings resolver", "No git command found.")
def setup_local_settings(self, all_settings):
self.error_raise("setup local settings", "No git command found.")
def init_data(self, desc=None):
self.error_raise("init data", "No git command found.")
def init_analysis(self, parent_folder, desc=None):
self.error_raise("init analysis", "No git command found.")
def commit(self, msg, auto_add=True, sync=True):
self.error_raise("commit", "No git command found.")
def sync(self):
self.error_raise("sync", "No git command found.")
self.error_raise("status", "No git command found.")
def clone(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
self.error_raise("clone", "No git command found.")
def move(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
self.error_raise("move", "No git command found.")
def addref(self):
self.error_raise("addref", "No git command found.")
def removeref(self, data_set_id=None):
self.error_raise("removeref", "No git command found.")
def download(self, data_set_id, content_copy_index, file, skip_integrity_check):
self.error_raise("download", "No git command found.")
def restore_signal_handler(data_mgmt):
data_mgmt.restore()
sys.exit(0)
""" To be used with commands that use the CommandLog. """
def f_with_log(self, *args):
try:
result = f(self, *args)
except Exception as e:
self.log.log_error(str(e))
raise e
if result.failure() == False:
self.log.success()
else:
self.log.log_error(result.output)
return result
return f_with_log
def with_restore(f):
""" Sets the restore point and restores on error. """
def f_with_restore(self, *args):
self.set_restorepoint()
try:
if self.restore_on_sigint:
signal.signal(signal.SIGINT, lambda signal, frame: restore_signal_handler(self))
result = f(self, *args)
if result.failure():
self.restore()
self.clear_restorepoint()
return result
except Exception as e:
self.restore()

yvesn
committed
if self.debug == True:
raise e
self.clear_restorepoint()
return CommandResult(returncode=-1, output="Error: " + str(e))
return f_with_restore
class GitDataMgmt(AbstractDataMgmt):
"""DataMgmt operations in normal state."""
def get_settings_resolver(self, relative_path=None):
if relative_path is None:
return self.settings_resolver
else:
settings_resolver = dm_config.SettingsResolver()
settings_resolver.set_resolver_location_roots('data_set', relative_path)
return settings_resolver
# TODO add this to abstract / other class
def setup_local_settings(self, all_settings):
self.settings_resolver.set_resolver_location_roots('data_set', '.')
for resolver_type, settings in all_settings.items():
resolver = getattr(self.settings_resolver, resolver_type)
for key, value in settings.items():
resolver.set_value_for_parameter(key, value, 'local')
Yves Noirjean
committed
def get_data_set_id(self, relative_path):
settings_resolver = self.get_settings_resolver(relative_path)
return settings_resolver.repository.config_dict().get('data_set_id')
def get_repository_id(self, relative_path):
settings_resolver = self.get_settings_resolver(relative_path)
return settings_resolver.repository.config_dict().get('id')
def init_data(self, desc=None):
# check that repository does not already exist
# TODO remove .git check after physical flow is implemented
if os.path.exists('.obis') and os.path.exists('.git'):
return CommandResult(returncode=-1, output="Folder is already an obis repository.")
return result
git_annex_backend = self.settings_resolver.config.config_dict().get('git_annex_backend')
result = self.git_wrapper.git_annex_init(desc, git_annex_backend)
return result
result = self.git_wrapper.initial_commit()
if result.failure():
return result
# Update the resolvers location
self.settings_resolver.set_resolver_location_roots('data_set', '.')
self.settings_resolver.copy_global_to_local()
def init_analysis(self, parent_folder, desc=None):
# get data_set_id of parent from current folder or explicit parent argument
parent_data_set_id = self.get_data_set_id(parent_folder)
# check that parent repository has been added to openBIS
if self.get_repository_id(parent_folder) is None:
return CommandResult(returncode=-1, output="Parent data set must be committed to openBIS before creating an analysis data set.")
# init analysis repository
result = self.init_data(desc)
if result.failure():
return result
# add analysis repository folder to .gitignore of parent
parent_folder_abs = os.path.join(os.getcwd(), parent_folder)
analysis_folder_abs = os.getcwd()
if Path(analysis_folder_abs) in Path(parent_folder_abs).parents:
analysis_folder_relative = os.path.relpath(analysis_folder_abs, parent_folder_abs)
with cd(parent_folder):
self.git_wrapper.git_ignore(analysis_folder_relative)
# set data_set_id to analysis repository so it will be used as parent when committing
self.set_property(self.settings_resolver.repository, "data_set_id", parent_data_set_id, False, False)
@with_restore
def sync(self):
return self._sync()
def _sync(self):
cmd = OpenbisSync(self, self.ignore_missing_parent)
Yves Noirjean
committed
@with_restore
def commit(self, msg, auto_add=True, sync=True):
""" Git add, commit and sync with openBIS. """
Chandrasekhar Ramakrishnan
committed
if auto_add:
result = self.git_wrapper.git_top_level_path()
Chandrasekhar Ramakrishnan
committed
return result
result = self.git_wrapper.git_add(result.output)
Chandrasekhar Ramakrishnan
committed
return result
result = self.git_wrapper.git_commit(msg)
# TODO If no changes were made check if the data set is in openbis. If not, just sync.
return result
result = self._sync()
return result
git_status = self.git_wrapper.git_status()
try:
sync_status = OpenbisSync(self).run(info_only=True)
except requests.exceptions.ConnectionError:
sync_status = CommandResult(returncode=-1, output="Could not connect to openBIS.")
output = git_status.output
if sync_status.failure():
if len(output) > 0:
output += '\n'
output += sync_status.output
return CommandResult(returncode=0, output=output)
""" Stores the git commit hash and copies the obis metadata. """
self.previous_git_commit_hash = self.git_wrapper.git_commit_hash().output
self.clear_restorepoint()
shutil.copytree('.obis', '.obis_restorepoint')
""" Resets to the stored git commit hash and restores the copied obis metadata. """
self.git_wrapper.git_reset_to(self.previous_git_commit_hash)
shutil.rmtree('.obis')
shutil.copytree('.obis_restorepoint', '.obis')
def clear_restorepoint(self):
""" Deletes the obis metadata copy. This must always be done. """
if os.path.exists('.obis_restorepoint'):
shutil.rmtree('.obis_restorepoint')
Chandrasekhar Ramakrishnan
committed
def clone(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
cmd = Clone(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check)
def move(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
cmd = Move(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check)
return cmd.run()
cmd = Addref(self)
return cmd.run()
def removeref(self, data_set_id=None):
cmd = Removeref(self, data_set_id=data_set_id)
def download(self, data_set_id, content_copy_index, file, skip_integrity_check):
cmd = Download(self, data_set_id, content_copy_index, file, skip_integrity_check)
def config(self, category, is_global, is_data_set_property, prop=None, value=None, set=False, get=False, clear=False):
"""
:param category: config, object, collection, data_set or repository
:param is_global: act on global settings - local if false
:param is_data_set_property: true if prop / value are a data set property
:param prop: setting key
:param value: setting value
:param set: True for setting values
:param get: True for getting values
:param clear: True for clearing values
"""
resolver = self.settings_resolver.get(category)
if resolver is None:
raise ValueError('Invalid settings category: ' + category)
if set == True:
assert get == False
assert clear == False
assert prop is not None
assert value is not None
elif get == True:
assert set == False
assert clear == False
assert value is None
elif clear == True:
assert get == False
assert set == False
assert value is None
assert set == True or get == True or clear == True
if is_global:
resolver.set_location_search_order(['global'])
else:
resolver.set_location_search_order(['local'])
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
config_dict = resolver.config_dict()
if is_data_set_property:
config_dict = config_dict['properties']
if get == True:
if prop is None:
config_str = json.dumps(config_dict, indent=4, sort_keys=True)
click_echo("{}".format(config_str), with_timestamp=False)
else:
if not prop in config_dict:
raise ValueError("Unknown setting {} for {}.".format(prop, resolver.categoty))
little_dict = {prop: config_dict[prop]}
config_str = json.dumps(little_dict, indent=4, sort_keys=True)
click_echo("{}".format(config_str), with_timestamp=False)
elif set == True:
return check_result("config", self.set_property(resolver, prop, value, is_global, is_data_set_property))
elif clear == True:
if prop is None:
returncode = 0
for prop in config_dict.keys():
returncode += check_result("config", self.set_property(resolver, prop, None, is_global, is_data_set_property))
return returncode
else:
return check_result("config", self.set_property(resolver, prop, None, is_global, is_data_set_property))
def set_property(self, resolver, prop, value, is_global, is_data_set_property=False):
"""Helper function to implement the property setting semantics."""
loc = 'global' if is_global else 'local'
try:
if is_data_set_property:
resolver.set_value_for_json_parameter('properties', prop, value, loc, apply_rules=True)
else:
resolver.set_value_for_parameter(prop, value, loc, apply_rules=True)
except Exception as e:
if self.debug == True:
raise e
return CommandResult(returncode=-1, output="Error: " + str(e))
else:
return CommandResult(returncode=0, output="")
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
class PhysicalDataMgmt(AbstractDataMgmt):
"""DataMgmt operations for DSS-stored data."""
def get_settings_resolver(self):
return dm_config.SettingsResolver()
def setup_local_settings(self, all_settings):
self.error_raise("setup local settings", "Not implemented.")
def init_data(self, desc=None):
if os.path.exists('.obis'):
return CommandResult(returncode=-1, output="Folder is already an obis repository.")
self.settings_resolver.set_resolver_location_roots('data_set', '.')
self.settings_resolver.copy_global_to_local()
self.settings_resolver.config.set_value_for_parameter("is_physical", True, "local")
return CommandResult(returncode=0, output="Physical obis repository initialized!")
def init_analysis(self, parent_folder, desc=None):
self.error_raise("init analysis", "Not implemented.")
def commit(self, msg, auto_add=True, sync=True):
self.error_raise("commit", "Not implemented.")
def sync(self):
self.error_raise("sync", "Not implemented.")
def status(self):
self.error_raise("status", "Not implemented.")
def clone(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
self.error_raise("clone", "Not implemented.")
def move(self, data_set_id, ssh_user, content_copy_index, skip_integrity_check):
self.error_raise("move", "Not implemented.")
def addref(self):
self.error_raise("addref", "Not implemented.")
def removeref(self, data_set_id=None):
self.error_raise("removeref", "Not implemented.")
def download(self, data_set_id, content_copy_index, file, skip_integrity_check):
self.error_raise("download", "Not implemented.")