diff --git a/src/python/OBis/obis/dm/command_result.py b/src/python/OBis/obis/dm/command_result.py new file mode 100644 index 0000000000000000000000000000000000000000..18e80abfdffbee701f791e39f7537dcbc69467b3 --- /dev/null +++ b/src/python/OBis/obis/dm/command_result.py @@ -0,0 +1,23 @@ +class CommandResult(object): + """Encapsulate result from a subprocess call.""" + + def __init__(self, completed_process=None, returncode=None, output=None): + """Convert a completed_process object into a ShellResult.""" + if completed_process: + self.returncode = completed_process.returncode + self.output = completed_process.stdout.decode('utf-8').strip() + else: + self.returncode = returncode + self.output = output + + def __str__(self): + return "CommandResult({},{})".format(self.returncode, self.output) + + def __repr__(self): + return "CommandResult({},{})".format(self.returncode, self.output) + + def success(self): + return self.returncode == 0 + + def failure(self): + return not self.success() \ No newline at end of file diff --git a/src/python/OBis/obis/dm/commands/__init__.py b/src/python/OBis/obis/dm/commands/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/python/OBis/obis/dm/commands/addref.py b/src/python/OBis/obis/dm/commands/addref.py new file mode 100644 index 0000000000000000000000000000000000000000..9d88a8537009916716f5cc13d48aa1b4c0814a43 --- /dev/null +++ b/src/python/OBis/obis/dm/commands/addref.py @@ -0,0 +1,49 @@ +import os +from .openbis_command import OpenbisCommand +from ..command_result import CommandResult +from ..utils import complete_openbis_config + + +class Addref(OpenbisCommand): + """ + Command to add the current folder, which is supposed to be an obis repository, as + a new content copy to openBIS. + """ + + def __init__(self, dm): + super(Addref, self).__init__(dm) + + + def run(self): + self.update_external_dms_id() + result = self.check_obis_repository() + if result.failure(): + return result + self.openbis.new_content_copy(self.path(), self.commit_id(), self.repository_id(), self.external_dms_id(), self.data_set_id()) + return CommandResult(returncode=0, output="") + + + def update_external_dms_id(self): + self.config_dict['external_dms_id'] = None + self.prepare_external_dms() + + + def check_obis_repository(self): + if os.path.exists('.obis'): + return CommandResult(returncode=0, output="") + else: + return CommandResult(returncode=-1, output="This is not an obis repository.") + + + def path(self): + result = self.git_wrapper.git_top_level_path() + if result.failure(): + return result + return result.output + + + def commit_id(self): + result = self.git_wrapper.git_commit_hash() + if result.failure(): + return result + return result.output diff --git a/src/python/OBis/obis/dm/commands/clone.py b/src/python/OBis/obis/dm/commands/clone.py new file mode 100644 index 0000000000000000000000000000000000000000..77df6316bf3af934c5358aaafd74e231cb964e55 --- /dev/null +++ b/src/python/OBis/obis/dm/commands/clone.py @@ -0,0 +1,137 @@ +import socket +import os +import pybis +from .openbis_command import OpenbisCommand +from ..command_result import CommandResult +from ..utils import cd +from ..utils import run_shell +from ..utils import complete_openbis_config +from .. import config as dm_config +from ...scripts.cli import shared_data_mgmt +from ... import dm + + +class Clone(OpenbisCommand): + """ + Implements the clone command. Copies a repository from a content copy of a data set + and adds the local copy as a new content copy using the addref command. + """ + + def __init__(self, dm, data_set_id, ssh_user, content_copy_index): + self.data_set_id = data_set_id + self.ssh_user = ssh_user + self.content_copy_index = content_copy_index + self.load_global_config(dm) + super(Clone, self).__init__(dm) + + + def load_global_config(self, dm): + """ + Use global config only. + """ + resolver = dm_config.ConfigResolver() + config = {} + complete_openbis_config(config, resolver, False) + dm.openbis_config = config + + + def check_configuration(self): + missing_config_settings = [] + if self.openbis is None: + missing_config_settings.append('openbis_url') + if self.user() is None: + missing_config_settings.append('user') + if len(missing_config_settings) > 0: + return CommandResult(returncode=-1, + output="Missing configuration settings for {}.".format(missing_config_settings)) + return CommandResult(returncode=0, output="") + + + def run(self): + + result = self.prepare_run() + if result.failure(): + return result + + data_set = self.openbis.get_dataset(self.data_set_id) + + content_copy = self.get_content_copy(data_set) + host = content_copy['externalDms']['address'].split(':')[0] + path = content_copy['path'] + repository_folder = path.split('/')[-1] + + result = self.copy_repository(self.ssh_user, host, path) + if result.failure(): + return result + result = self.checkout_commit(content_copy, path) + if result.failure(): + return result + return self.add_content_copy_to_openbis(repository_folder) + + + def get_content_copy(self, data_set): + if data_set.data['kind'] != 'LINK': + raise ValueError('Data set is of type ' + data_set.data['kind'] + ' but should be LINK.') + content_copies = data_set.data['linkedData']['contentCopies'] + if len(content_copies) == 0: + raise ValueError("Data set has no content copies.") + elif len(content_copies) == 1: + return content_copies[0] + else: + return self.select_content_copy(content_copies) + + + def select_content_copy(self, content_copies): + if self.content_copy_index is not None: + # use provided content_copy_index + if self.content_copy_index > 0 and self.content_copy_index <= len(content_copies): + return content_copies[self.content_copy_index-1] + else: + raise ValueError("Invalid content copy index.") + else: + # ask user + while True: + print('From which content copy do you want to clone?') + for i, content_copy in enumerate(content_copies): + host = content_copy['externalDms']['address'].split(":")[0] + path = content_copy['path'] + print(" {}) {}:{}".format(i+1, host, path)) + + copy_index_string = input('> ') + if copy_index_string.isdigit(): + copy_index_int = int(copy_index_string) + if copy_index_int > 0 and copy_index_int <= len(content_copies): + return content_copies[copy_index_int-1] + + + def copy_repository(self, ssh_user, host, path): + # abort if local folder already exists + repository_folder = path.split('/')[-1] + if os.path.exists(repository_folder): + return CommandResult(returncode=-1, output="Folder for repository to clone already exists: " + repository_folder) + # check if local or remote + if host == socket.gethostname(): + location = path + else: + location = ssh_user + "@" if ssh_user is not None else "" + location += host + ":" + path + # copy repository + return run_shell(["rsync", "--progress", "-av", location, "."]) + + + def checkout_commit(self, content_copy, path): + """ + Checks out the commit of the content copy from which the clone was made + in case there are newer commits / data sets. So the new copy is based on the + data set given as an input. + """ + commit_hash = content_copy['gitCommitHash'] + repository_folder = path.split('/')[-1] + with cd(repository_folder): + return self.git_wrapper.git_checkout(commit_hash) + + + def add_content_copy_to_openbis(self, repository_folder): + with cd(repository_folder): + data_mgmt = dm.DataMgmt(openbis_config={}, git_config={'find_git': True}) + return data_mgmt.addref() diff --git a/src/python/OBis/obis/dm/commands/openbis_command.py b/src/python/OBis/obis/dm/commands/openbis_command.py new file mode 100644 index 0000000000000000000000000000000000000000..61635c686f50ca20b722091ebf4c64603f98f573 --- /dev/null +++ b/src/python/OBis/obis/dm/commands/openbis_command.py @@ -0,0 +1,107 @@ +import getpass +import hashlib +import os +import socket +import pybis +from ..command_result import CommandResult + + +class OpenbisCommand(object): + + def __init__(self, dm, openbis=None): + self.data_mgmt = dm + self.openbis = dm.openbis + self.git_wrapper = dm.git_wrapper + self.config_resolver = dm.config_resolver + self.config_dict = dm.config_resolver.config_dict() + + if self.openbis is None and dm.openbis_config.get('url') is not None: + self.openbis = pybis.Openbis(**dm.openbis_config) + + def external_dms_id(self): + return self.config_dict.get('external_dms_id') + + def repository_id(self): + return self.config_dict.get('repository_id') + + def data_set_type(self): + return self.config_dict.get('data_set_type') + + def data_set_id(self): + return self.config_dict.get('data_set_id') + + def data_set_properties(self): + return self.config_dict.get('data_set_properties') + + def sample_id(self): + return self.config_dict.get('sample_id') + + def experiment_id(self): + return self.config_dict.get('experiment_id') + + def user(self): + return self.config_dict.get('user') + + + def prepare_run(self): + result = self.check_configuration() + if result.failure(): + return result + result = self.login() + if result.failure(): + return result + return CommandResult(returncode=0, output="") + + + def check_configuration(self): + """ overwrite in subclass """ + return CommandResult(returncode=0, output="") + + + def login(self): + if self.openbis.is_session_active(): + return CommandResult(returncode=0, output="") + user = self.user() + passwd = getpass.getpass("Password for {}:".format(user)) + try: + self.openbis.login(user, passwd, save_token=True) + except ValueError: + msg = "Could not log into openbis {}".format(self.config_dict['openbis_url']) + return CommandResult(returncode=-1, output=msg) + return CommandResult(returncode=0, output='') + + def prepare_external_dms(self): + # If there is no external data management system, create one. + result = self.get_or_create_external_data_management_system() + if result.failure(): + return result + external_dms = result.output + self.config_resolver.set_value_for_parameter('external_dms_id', external_dms.code, 'local') + self.config_dict['external_dms_id'] = external_dms.code + return result + + def generate_external_data_management_system_code(self, user, hostname, edms_path): + path_hash = hashlib.sha1(edms_path.encode("utf-8")).hexdigest()[0:8] + return "{}-{}-{}".format(user, hostname, path_hash).upper() + + def get_or_create_external_data_management_system(self): + external_dms_id = self.external_dms_id() + user = self.user() + hostname = socket.gethostname() + result = self.git_wrapper.git_top_level_path() + if result.failure(): + return result + top_level_path = result.output + edms_path, path_name = os.path.split(result.output) + if external_dms_id is None: + external_dms_id = self.generate_external_data_management_system_code(user, hostname, edms_path) + try: + external_dms = self.openbis.get_external_data_management_system(external_dms_id.upper()) + except ValueError as e: + # external dms does not exist - create it + try: + external_dms = self.openbis.create_external_data_management_system(external_dms_id, external_dms_id, + "{}:/{}".format(hostname, edms_path)) + except ValueError as e: + return CommandResult(returncode=-1, output=str(e)) + return CommandResult(returncode=0, output=external_dms) diff --git a/src/python/OBis/obis/dm/commands/openbis_sync.py b/src/python/OBis/obis/dm/commands/openbis_sync.py new file mode 100644 index 0000000000000000000000000000000000000000..c1ed9cb90379e832a6f3f1a746f92c06e6764dc6 --- /dev/null +++ b/src/python/OBis/obis/dm/commands/openbis_sync.py @@ -0,0 +1,109 @@ +import pybis +from ..command_result import CommandResult +import uuid +import os +from ..git import GitRepoFileInfo +from .openbis_command import OpenbisCommand + + +class OpenbisSync(OpenbisCommand): + """A command object for synchronizing with openBIS.""" + + def check_configuration(self): + missing_config_settings = [] + if self.openbis is None: + missing_config_settings.append('openbis_url') + if self.user() is None: + missing_config_settings.append('user') + if self.data_set_type() is None: + missing_config_settings.append('data_set_type') + if self.sample_id() is None and self.experiment_id() is None: + missing_config_settings.append('sample_id') + missing_config_settings.append('experiment_id') + if len(missing_config_settings) > 0: + return CommandResult(returncode=-1, + output="Missing configuration settings for {}.".format(missing_config_settings)) + return CommandResult(returncode=0, output="") + + def check_data_set_status(self): + """If we are in sync with the data set on the server, there is nothing to do.""" + # TODO Get the DataSet from the server + # - Find the content copy that refers to this repo + # - Check if the commit id is the current commit id + # - If so, skip sync. + return CommandResult(returncode=0, output="") + + def create_data_set_code(self): + try: + data_set_code = self.openbis.create_permId() + return CommandResult(returncode=0, output=""), data_set_code + except ValueError as e: + return CommandResult(returncode=-1, output=str(e)), None + + def create_data_set(self, data_set_code, external_dms, repository_id): + data_set_type = self.data_set_type() + parent_data_set_id = self.data_set_id() + properties = self.data_set_properties() + result = self.git_wrapper.git_top_level_path() + if result.failure(): + return result + top_level_path = result.output + result = self.git_wrapper.git_commit_hash() + if result.failure(): + return result + commit_id = result.output + sample_id = self.sample_id() + experiment_id = self.experiment_id() + contents = GitRepoFileInfo(self.git_wrapper).contents() + try: + data_set = self.openbis.new_git_data_set(data_set_type, top_level_path, commit_id, repository_id, external_dms.code, + sample=sample_id, experiment=experiment_id, properties=properties, parents=parent_data_set_id, + data_set_code=data_set_code, contents=contents) + return CommandResult(returncode=0, output=""), data_set + except ValueError as e: + return CommandResult(returncode=-1, output=str(e)), None + + + def commit_metadata_updates(self, msg_fragment=None): + return self.data_mgmt.commit_metadata_updates(msg_fragment) + + + def prepare_repository_id(self): + repository_id = self.repository_id() + if self.repository_id() is None: + repository_id = str(uuid.uuid4()) + self.config_resolver.set_value_for_parameter('repository_id', repository_id, 'local') + return CommandResult(returncode=0, output=repository_id) + + + def run(self): + # TODO Write mementos in case openBIS is unreachable + # - write a file to the .git/obis folder containing the commit id. Filename includes a timestamp so they can be sorted. + + result = self.prepare_run() + if result.failure(): + return result + + result = self.prepare_repository_id() + if result.failure(): + return result + repository_id = result.output + + result = self.prepare_external_dms() + if result.failure(): + return result + external_dms = result.output + + result, data_set_code = self.create_data_set_code() + if result.failure(): + return result + + self.commit_metadata_updates() + + # Update data set id as last commit so we can easily revert it on failure + self.config_resolver.set_value_for_parameter('data_set_id', data_set_code, 'local') + self.commit_metadata_updates("data set id") + + # create a data set, using the existing data set as a parent, if there is one + result, data_set = self.create_data_set(data_set_code, external_dms, repository_id) + return result \ No newline at end of file diff --git a/src/python/OBis/obis/dm/data_mgmt.py b/src/python/OBis/obis/dm/data_mgmt.py index 5b61784766d4e25a5890583586c8add9d1296162..17f0c775329afec5bf820d6b23c664095db275c6 100644 --- a/src/python/OBis/obis/dm/data_mgmt.py +++ b/src/python/OBis/obis/dm/data_mgmt.py @@ -12,16 +12,18 @@ Copyright (c) 2017 Chandrasekhar Ramakrishnan. All rights reserved. import abc import os import shutil -import subprocess -from contextlib import contextmanager -from . import config as dm_config import traceback -import getpass -import socket -import uuid -import hashlib - import pybis +from . import config as dm_config +from .commands.addref import Addref +from .commands.clone import Clone +from .commands.openbis_sync import OpenbisSync +from .command_result import CommandResult +from .git import GitWrapper +from .utils import default_echo +from .utils import complete_git_config +from .utils import complete_openbis_config +from .utils import cd # noinspection PyPep8Naming @@ -45,89 +47,6 @@ def DataMgmt(echo_func=None, config_resolver=None, openbis_config={}, git_config return GitDataMgmt(config_resolver, openbis_config, git_wrapper, openbis) -def complete_openbis_config(config, resolver): - """Add default values for empty entries in the config.""" - config_dict = resolver.config_dict(local_only=True) - if config.get('url') is None: - config['url'] = config_dict['openbis_url'] - if config.get('verify_certificates') is None: - if config_dict.get('verify_certificates') is not None: - config['verify_certificates'] = config_dict['verify_certificates'] - else: - config['verify_certificates'] = True - if config.get('token') is None: - config['token'] = None - - -def complete_git_config(config): - """Add default values for empty entries in the config.""" - - find_git = config['find_git'] if config.get('find_git') is not None else True - if find_git: - git_cmd = locate_command('git') - if git_cmd.success(): - config['git_path'] = git_cmd.output - - git_annex_cmd = locate_command('git-annex') - if git_annex_cmd.success(): - config['git_annex_path'] = git_annex_cmd.output - - -def default_echo(details): - if details.get('level') != "DEBUG": - print(details['message']) - - -class CommandResult(object): - """Encapsulate result from a subprocess call.""" - - def __init__(self, completed_process=None, returncode=None, output=None): - """Convert a completed_process object into a ShellResult.""" - if completed_process: - self.returncode = completed_process.returncode - self.output = completed_process.stdout.decode('utf-8').strip() - else: - self.returncode = returncode - self.output = output - - def __str__(self): - return "CommandResult({},{})".format(self.returncode, self.output) - - def __repr__(self): - return "CommandResult({},{})".format(self.returncode, self.output) - - def success(self): - return self.returncode == 0 - - def failure(self): - return not self.success() - - -def run_shell(args, shell=False): - return CommandResult(subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)) - - -def locate_command(command): - """Return a tuple of (returncode, stdout).""" - # Need to call this command in shell mode so we have the system PATH - result = run_shell(['type {}'.format(command)], shell=True) - # 'type -p' not supported by all shells, so we do it manually - if result.success(): - result.output = result.output.split(" ")[-1] - return result - - -@contextmanager -def cd(newdir): - """Safe cd -- return to original dir after execution, even if an exception is raised.""" - prevdir = os.getcwd() - os.chdir(os.path.expanduser(newdir)) - try: - yield - finally: - os.chdir(prevdir) - - class AbstractDataMgmt(metaclass=abc.ABCMeta): """Abstract object that implements operations. @@ -191,6 +110,22 @@ class AbstractDataMgmt(metaclass=abc.ABCMeta): """ return + @abc.abstractmethod + def clone(self, data_set_id, ssh_user, content_copy_index): + """Clone / copy a repository related to the given data set id. + :param data_set_id: + :param ssh_user: ssh user for remote clone (optional) + :param content_copy_index: index of content copy in case there are multiple copies (optional) + :return: A CommandResult. + """ + return + + def addref(self): + """Add the current folder as an obis repository to openBIS. + :return: A CommandResult. + """ + return + class NoGitDataMgmt(AbstractDataMgmt): """DataMgmt operations when git is not available -- show error messages.""" @@ -205,10 +140,16 @@ class NoGitDataMgmt(AbstractDataMgmt): self.error_raise("commit", "No git command found.") def sync(self): - self.error_raise("commit", "No git command found.") + self.error_raise("sync", "No git command found.") def status(self): - self.error_raise("commit", "No git command found.") + self.error_raise("status", "No git command found.") + + def clone(self, data_set_id, ssh_user, content_copy_index): + self.error_raise("clone", "No git command found.") + + def addref(self): + self.error_raise("addref", "No git command found.") class GitDataMgmt(AbstractDataMgmt): @@ -316,315 +257,18 @@ class GitDataMgmt(AbstractDataMgmt): folder = self.config_resolver.local_public_config_folder_path() self.git_wrapper.git_checkout(folder) - -class GitWrapper(object): - """A wrapper on commands to git.""" - - def __init__(self, git_path=None, git_annex_path=None, find_git=None): - self.git_path = git_path - self.git_annex_path = git_annex_path - - def can_run(self): - """Return true if the perquisites are satisfied to run""" - if self.git_path is None: - return False - if self.git_annex_path is None: - return False - if run_shell([self.git_path, 'help']).failure(): - # git help should have a returncode of 0 - return False - if run_shell([self.git_annex_path, 'help']).failure(): - # git help should have a returncode of 0 - return False - return True - - def git_init(self, path): - return run_shell([self.git_path, "init", path]) - - def git_status(self, path=None): - if path is None: - return run_shell([self.git_path, "status", "--porcelain"]) - else: - return run_shell([self.git_path, "status", "--porcelain", path]) - - def git_annex_init(self, path, desc): - cmd = [self.git_path, "-C", path, "annex", "init", "--version=6"] - if desc is not None: - cmd.append(desc) - result = run_shell(cmd) - if result.failure(): - return result - - cmd = [self.git_path, "-C", path, "config", "annex.thin", "true"] - result = run_shell(cmd) - if result.failure(): - return result - - attributes_src = os.path.join(os.path.dirname(__file__), "git-annex-attributes") - attributes_dst = os.path.join(path, ".gitattributes") - shutil.copyfile(attributes_src, attributes_dst) - cmd = [self.git_path, "-C", path, "add", ".gitattributes"] - result = run_shell(cmd) - if result.failure(): - return result - - cmd = [self.git_path, "-C", path, "commit", "-m", "Initial commit."] - result = run_shell(cmd) - return result - - def git_add(self, path): - return run_shell([self.git_path, "add", path]) - - def git_commit(self, msg): - return run_shell([self.git_path, "commit", '-m', msg]) - - def git_top_level_path(self): - return run_shell([self.git_path, 'rev-parse', '--show-toplevel']) - - def git_commit_hash(self): - return run_shell([self.git_path, 'rev-parse', '--short', 'HEAD']) - - def git_ls_tree(self): - return run_shell([self.git_path, 'ls-tree', '--full-tree', '-r', 'HEAD']) - - def git_checkout(self, path): - return run_shell([self.git_path, "checkout", path]) - - def git_reset_to(self, commit_hash): - return run_shell([self.git_path, 'reset', commit_hash]) - - -class OpenbisSync(object): - """A command object for synchronizing with openBIS.""" - - def __init__(self, dm, openbis=None): - self.data_mgmt = dm - self.git_wrapper = dm.git_wrapper - self.config_resolver = dm.config_resolver - self.config_dict = dm.config_resolver.config_dict() - self.openbis = dm.openbis - - if self.openbis is None and dm.openbis_config.get('url') is not None: - self.openbis = pybis.Openbis(**dm.openbis_config) - - def user(self): - return self.config_dict.get('user') - - def external_dms_id(self): - return self.config_dict.get('external_dms_id') - - def repository_id(self): - return self.config_dict.get('repository_id') - - def data_set_type(self): - return self.config_dict.get('data_set_type') - - def data_set_id(self): - return self.config_dict.get('data_set_id') - - def data_set_properties(self): - return self.config_dict.get('data_set_properties') - - def sample_id(self): - return self.config_dict.get('sample_id') - - def experiment_id(self): - return self.config_dict.get('experiment_id') - - def check_configuration(self): - missing_config_settings = [] - if self.openbis is None: - missing_config_settings.append('openbis_url') - if self.user() is None: - missing_config_settings.append('user') - if self.data_set_type() is None: - missing_config_settings.append('data_set_type') - if self.sample_id() is None and self.experiment_id() is None: - missing_config_settings.append('sample_id') - missing_config_settings.append('experiment_id') - if len(missing_config_settings) > 0: - return CommandResult(returncode=-1, - output="Missing configuration settings for {}.".format(missing_config_settings)) - return CommandResult(returncode=0, output="") - - def check_data_set_status(self): - """If we are in sync with the data set on the server, there is nothing to do.""" - # TODO Get the DataSet from the server - # - Find the content copy that refers to this repo - # - Check if the commit id is the current commit id - # - If so, skip sync. - return CommandResult(returncode=0, output="") - - def login(self): - if self.openbis.is_session_active(): - return CommandResult(returncode=0, output="") - user = self.user() - passwd = getpass.getpass("Password for {}:".format(user)) - try: - self.openbis.login(user, passwd, save_token=True) - except ValueError: - msg = "Could not log into openbis {}".format(self.config_dict['openbis_url']) - return CommandResult(returncode=-1, output=msg) - return CommandResult(returncode=0, output='') - - def generate_external_data_management_system_code(self, user, hostname, edms_path): - path_hash = hashlib.sha1(edms_path.encode("utf-8")).hexdigest()[0:8] - return "{}-{}-{}".format(user, hostname, path_hash).upper() - - def get_or_create_external_data_management_system(self): - external_dms_id = self.external_dms_id() - user = self.user() - hostname = socket.gethostname() - result = self.git_wrapper.git_top_level_path() - if result.failure(): - return result - top_level_path = result.output - edms_path, path_name = os.path.split(result.output) - if external_dms_id is None: - external_dms_id = self.generate_external_data_management_system_code(user, hostname, edms_path) - try: - external_dms = self.openbis.get_external_data_management_system(external_dms_id.upper()) - except ValueError as e: - # external dms does not exist - create it - try: - external_dms = self.openbis.create_external_data_management_system(external_dms_id, external_dms_id, - "{}:/{}".format(hostname, edms_path)) - except ValueError as e: - return CommandResult(returncode=-1, output=str(e)) - return CommandResult(returncode=0, output=external_dms) - - def create_data_set_code(self): + def clone(self, data_set_id, ssh_user, content_copy_index): try: - data_set_code = self.openbis.create_permId() - return CommandResult(returncode=0, output=""), data_set_code - except ValueError as e: - return CommandResult(returncode=-1, output=str(e)), None - - def create_data_set(self, data_set_code, external_dms, repository_id): - data_set_type = self.data_set_type() - parent_data_set_id = self.data_set_id() - properties = self.data_set_properties() - result = self.git_wrapper.git_top_level_path() - if result.failure(): - return result - top_level_path = result.output - result = self.git_wrapper.git_commit_hash() - if result.failure(): - return result - commit_id = result.output - sample_id = self.sample_id() - experiment_id = self.experiment_id() - contents = GitRepoFileInfo(self.git_wrapper).contents() - try: - data_set = self.openbis.new_git_data_set(data_set_type, top_level_path, commit_id, repository_id, external_dms.code, - sample=sample_id, experiment=experiment_id, properties=properties, parents=parent_data_set_id, - data_set_code=data_set_code, contents=contents) - return CommandResult(returncode=0, output=""), data_set - except ValueError as e: - return CommandResult(returncode=-1, output=str(e)), None - - def commit_metadata_updates(self, msg_fragment=None): - return self.data_mgmt.commit_metadata_updates(msg_fragment) - - def prepare_run(self): - result = self.check_configuration() - if result.failure(): - return result - result = self.login() - if result.failure(): - return result - return CommandResult(returncode=0, output="") - - def prepare_repository_id(self): - repository_id = self.repository_id() - if self.repository_id() is None: - repository_id = str(uuid.uuid4()) - self.config_resolver.set_value_for_parameter('repository_id', repository_id, 'local') - return CommandResult(returncode=0, output=repository_id) - - - def prepare_external_dms(self): - # If there is no external data management system, create one. - result = self.get_or_create_external_data_management_system() - if result.failure(): - return result - external_dms = result.output - self.config_resolver.set_value_for_parameter('external_dms_id', external_dms.code, 'local') - return result - - def run(self): - # TODO Write mementos in case openBIS is unreachable - # - write a file to the .git/obis folder containing the commit id. Filename includes a timestamp so they can be sorted. - - result = self.prepare_run() - if result.failure(): - return result - - result = self.prepare_repository_id() - if result.failure(): - return result - repository_id = result.output - - result = self.prepare_external_dms() - if result.failure(): - return result - external_dms = result.output - - result, data_set_code = self.create_data_set_code() - if result.failure(): - return result - - self.commit_metadata_updates() - - # Update data set id as last commit so we can easily revert it on failure - self.config_resolver.set_value_for_parameter('data_set_id', data_set_code, 'local') - self.commit_metadata_updates("data set id") - - # create a data set, using the existing data set as a parent, if there is one - result, data_set = self.create_data_set(data_set_code, external_dms, repository_id) - return result - - -class GitRepoFileInfo(object): - """Class that gathers checksums and file lengths for all files in the repo.""" - - def __init__(self, git_wrapper): - self.git_wrapper = git_wrapper + cmd = Clone(self, data_set_id, ssh_user, content_copy_index) + return cmd.run() + except Exception: + traceback.print_exc() + return CommandResult(returncode=-1, output="Could not clone repository.") - def contents(self): - """Return a list of dicts describing the contents of the repo. - :return: A list of dictionaries - {'crc32': checksum, - 'fileLength': size of the file, - 'path': path relative to repo root. - 'directory': False - }""" - files = self.file_list() - cksum = self.cksum(files) - return cksum - - def file_list(self): - tree = self.git_wrapper.git_ls_tree() - if tree.failure(): - return [] - lines = tree.output.split("\n") - files = [line.split("\t")[-1].strip() for line in lines] - return files - - def cksum(self, files): - cmd = ['cksum'] - cmd.extend(files) - result = run_shell(cmd) - if result.failure(): - return [] - lines = result.output.split("\n") - return [self.checksum_line_to_dict(line) for line in lines] - - @staticmethod - def checksum_line_to_dict(line): - fields = line.split(" ") - return { - 'crc32': int(fields[0]), - 'fileLength': int(fields[1]), - 'path': fields[2] - } + def addref(self): + try: + cmd = Addref(self) + return cmd.run() + except Exception: + traceback.print_exc() + return CommandResult(returncode=-1, output="Could not add reference.") diff --git a/src/python/OBis/obis/dm/git.py b/src/python/OBis/obis/dm/git.py new file mode 100644 index 0000000000000000000000000000000000000000..746a86d947a0e6dfaa61ec76a02534ffdde2c183 --- /dev/null +++ b/src/python/OBis/obis/dm/git.py @@ -0,0 +1,125 @@ +import shutil +import os +from .utils import run_shell + + +class GitWrapper(object): + """A wrapper on commands to git.""" + + def __init__(self, git_path=None, git_annex_path=None, find_git=None): + self.git_path = git_path + self.git_annex_path = git_annex_path + + def can_run(self): + """Return true if the perquisites are satisfied to run""" + if self.git_path is None: + return False + if self.git_annex_path is None: + return False + if run_shell([self.git_path, 'help']).failure(): + # git help should have a returncode of 0 + return False + if run_shell([self.git_annex_path, 'help']).failure(): + # git help should have a returncode of 0 + return False + return True + + def git_init(self, path): + return run_shell([self.git_path, "init", path]) + + def git_status(self, path=None): + if path is None: + return run_shell([self.git_path, "status", "--porcelain"]) + else: + return run_shell([self.git_path, "status", "--porcelain", path]) + + def git_annex_init(self, path, desc): + cmd = [self.git_path, "-C", path, "annex", "init", "--version=6"] + if desc is not None: + cmd.append(desc) + result = run_shell(cmd) + if result.failure(): + return result + + cmd = [self.git_path, "-C", path, "config", "annex.thin", "true"] + result = run_shell(cmd) + if result.failure(): + return result + + attributes_src = os.path.join(os.path.dirname(__file__), "git-annex-attributes") + attributes_dst = os.path.join(path, ".gitattributes") + shutil.copyfile(attributes_src, attributes_dst) + cmd = [self.git_path, "-C", path, "add", ".gitattributes"] + result = run_shell(cmd) + if result.failure(): + return result + + cmd = [self.git_path, "-C", path, "commit", "-m", "Initial commit."] + result = run_shell(cmd) + return result + + def git_add(self, path): + return run_shell([self.git_path, "add", path]) + + def git_commit(self, msg): + return run_shell([self.git_path, "commit", '-m', msg]) + + def git_top_level_path(self): + return run_shell([self.git_path, 'rev-parse', '--show-toplevel']) + + def git_commit_hash(self): + return run_shell([self.git_path, 'rev-parse', '--short', 'HEAD']) + + def git_ls_tree(self): + return run_shell([self.git_path, 'ls-tree', '--full-tree', '-r', 'HEAD']) + + def git_checkout(self, path): + return run_shell([self.git_path, "checkout", path]) + + def git_reset_to(self, commit_hash): + return run_shell([self.git_path, 'reset', commit_hash]) + + +class GitRepoFileInfo(object): + """Class that gathers checksums and file lengths for all files in the repo.""" + + def __init__(self, git_wrapper): + self.git_wrapper = git_wrapper + + def contents(self): + """Return a list of dicts describing the contents of the repo. + :return: A list of dictionaries + {'crc32': checksum, + 'fileLength': size of the file, + 'path': path relative to repo root. + 'directory': False + }""" + files = self.file_list() + cksum = self.cksum(files) + return cksum + + def file_list(self): + tree = self.git_wrapper.git_ls_tree() + if tree.failure(): + return [] + lines = tree.output.split("\n") + files = [line.split("\t")[-1].strip() for line in lines] + return files + + def cksum(self, files): + cmd = ['cksum'] + cmd.extend(files) + result = run_shell(cmd) + if result.failure(): + return [] + lines = result.output.split("\n") + return [self.checksum_line_to_dict(line) for line in lines] + + @staticmethod + def checksum_line_to_dict(line): + fields = line.split(" ") + return { + 'crc32': int(fields[0]), + 'fileLength': int(fields[1]), + 'path': fields[2] + } \ No newline at end of file diff --git a/src/python/OBis/obis/dm/utils.py b/src/python/OBis/obis/dm/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0a4a52e0cde621a2591ef51ecd8366453ad9d479 --- /dev/null +++ b/src/python/OBis/obis/dm/utils.py @@ -0,0 +1,62 @@ +import subprocess +import os +from contextlib import contextmanager +from .command_result import CommandResult + + +def complete_openbis_config(config, resolver, local_only=True): + """Add default values for empty entries in the config.""" + config_dict = resolver.config_dict(local_only) + if config.get('url') is None: + config['url'] = config_dict['openbis_url'] + if config.get('verify_certificates') is None: + if config_dict.get('verify_certificates') is not None: + config['verify_certificates'] = config_dict['verify_certificates'] + else: + config['verify_certificates'] = True + if config.get('token') is None: + config['token'] = None + + +def complete_git_config(config): + """Add default values for empty entries in the config.""" + + find_git = config['find_git'] if config.get('find_git') is not None else True + if find_git: + git_cmd = locate_command('git') + if git_cmd.success(): + config['git_path'] = git_cmd.output + + git_annex_cmd = locate_command('git-annex') + if git_annex_cmd.success(): + config['git_annex_path'] = git_annex_cmd.output + + +def default_echo(details): + if details.get('level') != "DEBUG": + print(details['message']) + + +def run_shell(args, shell=False): + return CommandResult(subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell)) + + +def locate_command(command): + """Return a tuple of (returncode, stdout).""" + # Need to call this command in shell mode so we have the system PATH + result = run_shell(['type {}'.format(command)], shell=True) + # 'type -p' not supported by all shells, so we do it manually + if result.success(): + result.output = result.output.split(" ")[-1] + return result + + +@contextmanager +def cd(newdir): + """Safe cd -- return to original dir after execution, even if an exception is raised.""" + prevdir = os.getcwd() + os.chdir(os.path.expanduser(newdir)) + try: + yield + finally: + os.chdir(prevdir) diff --git a/src/python/OBis/obis/scripts/cli.py b/src/python/OBis/obis/scripts/cli.py index 82a454c2c1a16593d30255d9fd6fb742db9b59e6..817f6e200d3a2da744e4ffafdee0e3b8ad5aeaa7 100644 --- a/src/python/OBis/obis/scripts/cli.py +++ b/src/python/OBis/obis/scripts/cli.py @@ -15,7 +15,9 @@ from datetime import datetime import click -from .. import dm, CommandResult +from .. import dm +from ..dm.command_result import CommandResult +from ..dm.utils import cd def click_echo(message): @@ -59,20 +61,25 @@ def cli(ctx, quiet, skip_verification): @cli.command() @click.pass_context -@click.argument('other', type=click.Path(exists=True)) -def addref(ctx, other): +@click.argument('repository', type=click.Path(exists=True)) +def addref(ctx, repository): """Add a reference to the other repository in this repository. """ - click_echo("addref {}".format(other)) + with cd(repository): + data_mgmt = shared_data_mgmt(ctx.obj) + return check_result("addref", data_mgmt.addref()) @cli.command() @click.pass_context -@click.argument('url') -def clone(ctx, url): - """Clone the repository found at url. +@click.option('-u', '--ssh_user', default=None, help='User to connect to remote systems via ssh') +@click.option('-c', '--content_copy_index', type=int, default=None, help='Index of the content copy to clone from in case there are multiple copies') +@click.argument('data_set_id') +def clone(ctx, ssh_user, content_copy_index, data_set_id): + """Clone the repository found in the given data set id. """ - click_echo("clone {}".format(url)) + data_mgmt = shared_data_mgmt(ctx.obj) + return check_result("clone", data_mgmt.clone(data_set_id, ssh_user, content_copy_index)) @cli.command()