Skip to content
Snippets Groups Projects
Commit 6d63085a authored by Chandrasekhar Ramakrishnan's avatar Chandrasekhar Ramakrishnan
Browse files

SSDM-4670: Provide file metadata to openbis.

parent 8e987860
No related branches found
No related tags found
No related merge requests found
...@@ -342,6 +342,9 @@ class GitWrapper(object): ...@@ -342,6 +342,9 @@ class GitWrapper(object):
def git_commit_id(self): def git_commit_id(self):
return run_shell([self.git_path, 'rev-parse', '--short', 'HEAD']) return run_shell([self.git_path, 'rev-parse', '--short', 'HEAD'])
def git_ls_tree(self):
return run_shell([self.git_path, 'ls-tree', '--full-tree', '-r', 'HEAD'])
class OpenbisSync(object): class OpenbisSync(object):
"""A command object for synchronizing with openBIS.""" """A command object for synchronizing with openBIS."""
...@@ -443,10 +446,11 @@ class OpenbisSync(object): ...@@ -443,10 +446,11 @@ class OpenbisSync(object):
return result return result
commit_id = result.output commit_id = result.output
object_id = self.object_id() object_id = self.object_id()
contents = GitRepoFileInfo(self.git_wrapper).contents()
try: try:
data_set = self.openbis.new_git_data_set(data_set_type, top_level_path, commit_id, external_dms.code, data_set = self.openbis.new_git_data_set(data_set_type, top_level_path, commit_id, external_dms.code,
object_id, data_set_code=data_set_code, parents=parent_data_set_id, object_id, data_set_code=data_set_code, parents=parent_data_set_id,
properties=properties) properties=properties, contents=contents)
return CommandResult(returncode=0, output=""), data_set return CommandResult(returncode=0, output=""), data_set
except ValueError as e: except ValueError as e:
return CommandResult(returncode=-1, output=str(e)), None return CommandResult(returncode=-1, output=str(e)), None
...@@ -495,7 +499,7 @@ class OpenbisSync(object): ...@@ -495,7 +499,7 @@ class OpenbisSync(object):
self.commit_metadata_updates() self.commit_metadata_updates()
# create a data set, using the existing data set as a parent, if there is one # create a data set, using the existing data set as a parent, if there is one
result, data_set = self.create_data_set(None, external_dms) result, data_set = self.create_data_set("DUMMY", external_dms)
if result.failure(): if result.failure():
return result return result
...@@ -538,3 +542,48 @@ class OpenbisSync(object): ...@@ -538,3 +542,48 @@ class OpenbisSync(object):
def run(self): def run(self):
return self.run_workaround() return self.run_workaround()
class GitRepoFileInfo(object):
"""Class that gathers checksums and file lengths for all files in the repo."""
def __init__(self, git_wrapper):
self.git_wrapper = git_wrapper
def contents(self):
"""Return a list of dicts describing the contents of the repo.
:return: A list of dictionaries
{'crc32': checksum,
'fileLength': size of the file,
'path': path relative to repo root.
'directory': False
}"""
files = self.file_list()
cksum = self.cksum(files)
return cksum
def file_list(self):
tree = self.git_wrapper.git_ls_tree()
if tree.failure():
return []
lines = tree.output.split("\n")
files = [line.split("\t")[-1].strip() for line in lines]
return files
def cksum(self, files):
cmd = ['cksum']
cmd.extend(files)
result = run_shell(cmd)
if result.failure():
return []
lines = result.output.split("\n")
return [self.checksum_line_to_dict(line) for line in lines]
@staticmethod
def checksum_line_to_dict(line):
fields = line.split(" ")
return {
'crc32': int(fields[0]),
'fileLength': int(fields[1]),
'path': fields[2]
}
...@@ -161,10 +161,12 @@ def test_child_data_set(tmpdir): ...@@ -161,10 +161,12 @@ def test_child_data_set(tmpdir):
child_ds_code = dm.config_resolver.config_dict()['data_set_id'] child_ds_code = dm.config_resolver.config_dict()['data_set_id']
assert parent_ds_code != child_ds_code assert parent_ds_code != child_ds_code
commit_id = dm.git_wrapper.git_commit_id().output commit_id = dm.git_wrapper.git_commit_id().output
contents = data_mgmt.GitRepoFileInfo(dm.git_wrapper).contents()
# TODO Fix when the workaround is no longer needed. # TODO Fix when the workaround is no longer needed.
# (see OpenbisSync.run_correct) # (see OpenbisSync.run_correct)
# check_new_data_set_expectations(dm, tmp_dir_path, commit_id, ANY, child_ds_code, parent_ds_code, properties) # check_new_data_set_expectations(dm, tmp_dir_path, commit_id, ANY, child_ds_code, parent_ds_code, properties, contents)
check_new_data_set_expectations(dm, tmp_dir_path, commit_id, ANY, None, parent_ds_code, properties) check_new_data_set_expectations(dm, tmp_dir_path, commit_id, ANY, "DUMMY", parent_ds_code, properties, contents)
# TODO Test that if the data set registration fails, the data_set_id is reverted # TODO Test that if the data set registration fails, the data_set_id is reverted
...@@ -198,9 +200,11 @@ def prepare_new_data_set_expectations(dm, properties={}): ...@@ -198,9 +200,11 @@ def prepare_new_data_set_expectations(dm, properties={}):
dm.openbis.new_git_data_set = MagicMock(return_value=data_set) dm.openbis.new_git_data_set = MagicMock(return_value=data_set)
def check_new_data_set_expectations(dm, tmp_dir_path, commit_id, external_dms, data_set_id, parent_id, properties): def check_new_data_set_expectations(dm, tmp_dir_path, commit_id, external_dms, data_set_id, parent_id, properties,
contents):
dm.openbis.new_git_data_set.assert_called_with('DS_TYPE', tmp_dir_path, commit_id, external_dms, "/SAMPLE/ID", dm.openbis.new_git_data_set.assert_called_with('DS_TYPE', tmp_dir_path, commit_id, external_dms, "/SAMPLE/ID",
data_set_code=data_set_id, parents=parent_id, properties=properties) data_set_code=data_set_id, parents=parent_id, properties=properties,
contents=contents)
def copy_test_data(tmpdir): def copy_test_data(tmpdir):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment