Newer
Older
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
parse_jackson(objects)
for object in objects:
object['permId'] = object['permId']['permId']
if object.get('entityType') is not None:
object['entityType'] = object['entityType']['code']
elif object.get('propertyType') is not None:
object['propertyType'] = object['propertyType']['code']
elif object.get('propertyAssignment') is not None:
object['entityType'] = object['propertyAssignment']['entityType']['code']
object['propertyType'] = object['propertyAssignment']['propertyType']['code']
object['creationDate'] = format_timestamp(object['creationDate'])
return objects
else:
raise ValueError("No semantic annotations found!")
def get_semantic_annotations(self):
""" Get a list of all available semantic annotations (DataFrame object).
"""
objects = self._search_semantic_annotations({})
attrs = ['permId', 'entityType', 'propertyType', 'predicateOntologyId', 'predicateOntologyVersion', 'predicateAccessionId', 'descriptorOntologyId', 'descriptorOntologyVersion', 'descriptorAccessionId', 'creationDate']
annotations = DataFrame(objects)
return Things(self, 'semantic_annotation', annotations[attrs], 'permId')
def get_semantic_annotation(self, permId, only_data = False):
criteria = {
"@type" : "as.dto.semanticannotation.search.SemanticAnnotationSearchCriteria",
"criteria" : [{
"@type" : "as.dto.common.search.PermIdSearchCriteria",
"fieldValue" : {
"@type" : "as.dto.common.search.StringEqualToValue",
"value" : permId
}
}]
}
objects = self._search_semantic_annotations(criteria)
object = objects[0]
if only_data:
return object
else:
return SemanticAnnotation(self, isNew=False, **object)
Swen Vermeul
committed
def get_sample_types(self, type=None):
""" Returns a list of all available sample types
"""
return self._get_types_of(
"searchSampleTypes",
"Sample",
type,
Swen Vermeul
committed
["generatedCodePrefix"]
)
get_object_types = get_sample_types # Alias
Swen Vermeul
committed
def get_sample_type(self, type):
try:
return self._get_types_of(
"searchSampleTypes",
Swen Vermeul
committed
"Sample",
type,
["generatedCodePrefix"]
)
except Exception:
raise ValueError("no such sample type: {}".format(type))
get_object_type = get_sample_type # Alias
Swen Vermeul
committed
def get_experiment_types(self, type=None):
""" Returns a list of all available experiment types
"""
return self._get_types_of(
"searchExperimentTypes",
"Experiment",
Swen Vermeul
committed
type
)
def get_experiment_type(self, type):
Swen Vermeul
committed
return self._get_types_of(
"searchExperimentTypes",
"Experiment",
Swen Vermeul
committed
type
)
except Exception:
raise ValueError("No such experiment type: {}".format(type))
def get_material_types(self, type=None):
""" Returns a list of all available material types
"""
return self._get_types_of("searchMaterialTypes", "Material", type)
def get_material_type(self, type):
try:
return self._get_types_of("searchMaterialTypes", "Material", type)
except Exception:
raise ValueError("No such material type: {}".format(type))
def get_dataset_types(self, type=None):
""" Returns a list (DataFrame object) of all currently available dataset types
"""
Yves Noirjean
committed
return self._get_types_of("searchDataSetTypes", "DataSet", type, optional_attributes=['kind'])
Swen Vermeul
committed
def get_dataset_type(self, type):
try:
Yves Noirjean
committed
return self._get_types_of("searchDataSetTypes", "DataSet", type, optional_attributes=['kind'])
Swen Vermeul
committed
except Exception:
raise ValueError("No such dataSet type: {}".format(type))
Yves Noirjean
committed
def _get_types_of(self, method_name, entity, type_name=None, additional_attributes=[], optional_attributes=[]):
""" Returns a list of all available types of an entity.
If the name of the entity-type is given, it returns a PropertyAssignments object
search_request = {}
fetch_options = {}
if type_name is not None:
search_request = _gen_search_criteria({
"operator": "AND",
})
fetch_options = {
Swen Vermeul
committed
"@type": "as.dto.{}.fetchoptions.{}TypeFetchOptions".format(
Swen Vermeul
committed
)
}
Swen Vermeul
committed
fetch_options['propertyAssignments'] = fetch_option['propertyAssignments']
"params": [self.token, search_request, fetch_options],
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
if type_name is not None and len(resp['objects']) == 1:
return PropertyAssignments(self, resp['objects'][0])
if len(resp['objects']) >= 1:
types = DataFrame(resp['objects'])
Swen Vermeul
committed
types['modificationDate'] = types['modificationDate'].map(format_timestamp)
Yves Noirjean
committed
attributes = self._get_attributes(type_name, types, additional_attributes, optional_attributes)
return Things(self, entity.lower() + '_type', types[attributes])
Yves Noirjean
committed
def _get_attributes(self, type_name, types, additional_attributes, optional_attributes):
attributes = ['code', 'description'] + additional_attributes
attributes += [attribute for attribute in optional_attributes if attribute in types]
attributes += ['modificationDate']
if type_name is not None:
attributes += ['propertyAssignments']
return attributes
Swen Vermeul
committed
def is_session_active(self):
""" checks whether a session is still active. Returns true or false.
"""
Swen Vermeul
committed
return self.is_token_valid(self.token)
def is_token_valid(self, token=None):
Chandrasekhar Ramakrishnan
committed
"""Check if the connection to openBIS is valid.
This method is useful to check if a token is still valid or if it has timed out,
requiring the user to login again.
Chandrasekhar Ramakrishnan
committed
:return: Return True if the token is valid, False if it is not valid.
"""
if token is None:
token = self.token
if token is None:
return False
request = {
"method": "isSessionActive",
"params": [token],
resp = self._post_request(self.as_v1, request)
def get_dataset(self, permid, only_data=False):
"""fetch a dataset and some metadata attached to it:
- properties
- sample
- parents
- children
- containers
- dataStore
- physicalData
- linkedData
:return: a DataSet object
"""
criteria = [{
"permId": permid,
"@type": "as.dto.dataset.id.DataSetPermId"
}]
fetchopts = {
"parents": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
"children": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
"containers": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
"type": {"@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions"},
}
for option in ['tags', 'properties', 'dataStore', 'physicalData', 'linkedData',
'experiment', 'sample']:
fetchopts[option] = fetch_option[option]
request = {
"params": [
self.token,
criteria,
fetchopts,
],
resp = self._post_request(self.as_v3, request)
raise ValueError('no such dataset found: ' + permid)
parse_jackson(resp)
for permid in resp:
if only_data:
return resp[permid]
else:
return DataSet(
self,
type=self.get_dataset_type(resp[permid]["type"]["code"]),
data=resp[permid]
)
Swen Vermeul
committed
def get_sample(self, sample_ident, only_data=False, withAttachments=False):
Chandrasekhar Ramakrishnan
committed
"""Retrieve metadata for the sample.
Get metadata for the sample and any directly connected parents of the sample to allow access
to the same information visible in the ELN UI. The metadata will be on the file system.
:param sample_identifiers: A list of sample identifiers to retrieve.
"""
search_request = search_request_for_identifier(sample_ident, 'sample')
Swen Vermeul
committed
fetchopts = {"type": {"@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions"}}
Swen Vermeul
committed
for option in ['tags', 'properties', 'attachments', 'space', 'experiment', 'registrator', 'dataSets']:
fetchopts[option] = fetch_option[option]
if withAttachments:
fetchopts['attachments'] = fetch_option['attachmentsWithContent']
for key in ['parents','children','container','components']:
fetchopts[key] = {"@type": "as.dto.sample.fetchoptions.SampleFetchOptions"}
sample_request = {
"method": "getSamples",
"params": [
self.token,
[search_request],
Swen Vermeul
committed
fetchopts
resp = self._post_request(self.as_v3, sample_request)
parse_jackson(resp)
raise ValueError('no such sample found: ' + sample_ident)
for sample_ident in resp:
if only_data:
return resp[sample_ident]
else:
return Sample(self, self.get_sample_type(resp[sample_ident]["type"]["code"]), resp[sample_ident])
get_object = get_sample # Alias
def get_external_data_management_system(self, permId, only_data=False):
Chandrasekhar Ramakrishnan
committed
"""Retrieve metadata for the external data management system.
:param permId: A permId for an external DMS.
Chandrasekhar Ramakrishnan
committed
:param only_data: Return the result data as a hash-map, not an object.
"""
request = {
"method": "getExternalDataManagementSystems",
"params": [
self.token,
[{
"@type": "as.dto.externaldms.id.ExternalDmsPermId",
"permId": permId
Chandrasekhar Ramakrishnan
committed
}],
{},
],
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
if resp is None or len(resp) == 0:
raise ValueError('no such external DMS found: ' + permId)
Chandrasekhar Ramakrishnan
committed
else:
for ident in resp:
if only_data:
return resp[ident]
else:
return ExternalDMS(self, resp[ident])
def new_space(self, **kwargs):
return Space(self, None, **kwargs)
Swen Vermeul
committed
def new_analysis(self, name, description=None, sample=None, dss_code=None, result_files=None,
notebook_files=None, parents=None):
Swen Vermeul
committed
""" An analysis contains the Jupyter notebook file(s) and some result files.
Technically this method involves uploading files to the session workspace
and activating the dropbox aka dataset ingestion service "jupyter-uploader-api"
Swen Vermeul
committed
"""
if dss_code is None:
dss_code = self.get_datastores()['code'][0]
# if a sample identifier was given, use it as a string.
# if a sample object was given, take its identifier
sampleId = self.sample_to_sample_id(sample)
Swen Vermeul
committed
parentIds = []
if parents is not None:
if not isinstance(parents, list):
parants = [parents]
for parent in parents:
parentIds.append(parent.permId)
Swen Vermeul
committed
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
Swen Vermeul
committed
data_sets = []
if notebook_files is not None:
notebooks_folder = os.path.join(folder, 'notebook_files')
self.upload_files(
datastore_url=datastore_url,
Swen Vermeul
committed
files=notebook_files,
folder=notebooks_folder,
Swen Vermeul
committed
wait_until_finished=True
)
data_sets.append({
"dataSetType": "JUPYTER_NOTEBOOk",
Swen Vermeul
committed
"sessionWorkspaceFolder": notebooks_folder,
"fileNames": notebook_files,
"properties": {}
Swen Vermeul
committed
})
if result_files is not None:
results_folder = os.path.join(folder, 'result_files')
self.upload_files(
datastore_url=datastore_url,
Swen Vermeul
committed
files=result_files,
folder=results_folder,
wait_until_finished=True
)
data_sets.append({
"dataSetType": "JUPYTER_RESULT",
"sessionWorkspaceFolder": results_folder,
"fileNames": result_files,
"properties": {}
Swen Vermeul
committed
})
"method": "createReportFromAggregationService",
"params": [
self.token,
dss_code,
{
"sampleId": sampleId,
"parentIds": parentIds,
"containers": [{
"dataSetType": "JUPYTER_CONTAINER",
"properties": {
"NAME": name,
"DESCRIPTION": description
}
}],
"dataSets": data_sets,
}
],
resp = self._post_request(self.reg_v1, request)
try:
if resp['rows'][0][0]['value'] == 'OK':
return resp['rows'][0][1]['value']
except:
return resp
Swen Vermeul
committed
def new_git_data_set(self, data_set_type, path, commit_id, repository_id, dms, sample=None, experiment=None, properties={},
dss_code=None, parents=None, data_set_code=None, contents=[]):
""" Create a link data set.
:param data_set_type: The type of the data set
Chandrasekhar Ramakrishnan
committed
:param data_set_type: The type of the data set
:param path: The path to the git repository
:param commit_id: The git commit id
:param repository_id: The git repository id - same for copies
:param dms: An external data managment system object or external_dms_id
:param sample: A sample object or sample id.
Chandrasekhar Ramakrishnan
committed
:param dss_code: Code for the DSS -- defaults to the first dss if none is supplied.
:param properties: Properties for the data set.
:param parents: Parents for the data set.
Chandrasekhar Ramakrishnan
committed
:param data_set_code: A data set code -- used if provided, otherwise generated on the server
:param contents: A list of dicts that describe the contents:
{'file_length': [file length],
'crc32': [crc32 checksum],
'directory': [is path a directory?]
'path': [the relative path string]}
:return: A DataSet object
"""
return pbds.GitDataSetCreation(self, data_set_type, path, commit_id, repository_id, dms, sample, experiment,
properties, dss_code, parents, data_set_code, contents).new_git_data_set()
def new_content_copy(self, path, commit_id, repository_id, edms_id, data_set_id):
"""
Create a content copy in an existing link data set.
:param path: path of the new content copy
"param commit_id: commit id of the new content copy
"param repository_id: repository id of the content copy
"param edms_id: Id of the external data managment system of the content copy
"param data_set_id: Id of the data set to which the new content copy belongs
"""
return pbds.GitDataSetUpdate(self, path, commit_id, repository_id, edms_id, data_set_id).new_content_copy()
Chandrasekhar Ramakrishnan
committed
@staticmethod
def sample_to_sample_id(sample):
"""Take sample which may be a string or object and return an identifier for it."""
return Openbis._object_to_object_id(sample, "as.dto.sample.id.SampleIdentifier", "as.dto.sample.id.SamplePermId");
@staticmethod
def experiment_to_experiment_id(experiment):
"""Take experiment which may be a string or object and return an identifier for it."""
return Openbis._object_to_object_id(experiment, "as.dto.experiment.id.ExperimentIdentifier", "as.dto.experiment.id.SamplePermId");
@staticmethod
def _object_to_object_id(obj, identifierType, permIdType):
object_id = None
if isinstance(obj, str):
if (is_identifier(obj)):
object_id = {
"identifier": obj,
"@type": identifierType
}
else:
object_id = {
"permId": obj,
"@type": permIdType
}
else:
object_id = {
"identifier": obj.identifier,
"@type": identifierType
return object_id
Chandrasekhar Ramakrishnan
committed
@staticmethod
def data_set_to_data_set_id(data_set):
if isinstance(data_set, str):
code = data_set
else:
code = data_set.permId
return {
"permId": code,
"@type": "as.dto.dataset.id.DataSetPermId"
}
def external_data_managment_system_to_dms_id(self, dms):
if isinstance(dms, str):
dms_id = {
"permId": dms,
"@type": "as.dto.externaldms.id.ExternalDmsPermId"
}
else:
dms_id = {
"identifier": dms.code,
"@type": "as.dto.sample.id.SampleIdentifier"
}
return dms_id
Swen Vermeul
committed
def new_sample(self, type, props=None, **kwargs):
""" Creates a new sample of a given sample type.
Swen Vermeul
committed
return Sample(self, self.get_sample_type(type), None, props, **kwargs)
new_object = new_sample # Alias
def new_dataset(self, type=None, files=None, props=None, folder=None, **kwargs):
""" Creates a new dataset of a given sample type.
"""
if files is None:
raise ValueError('please provide at least one file')
elif isinstance(files, str):
files = [files]
type_obj = self.get_dataset_type(type.upper())
return DataSet(self, type=type_obj, files=files, folder=folder, props=props, **kwargs)
def new_semantic_annotation(self, entityType=None, propertyType=None, **kwargs):
return SemanticAnnotation(
openbis_obj=self, isNew=True,
entityType=entityType, propertyType=propertyType, **kwargs
)
def _get_dss_url(self, dss_code=None):
""" internal method to get the downloadURL of a datastore.
"""
Swen Vermeul
committed
dss = self.get_datastores()
if dss_code is None:
return dss['downloadUrl'][0]
else:
Swen Vermeul
committed
return dss[dss['code'] == dss_code]['downloadUrl'][0]
Swen Vermeul
committed
def upload_files(self, datastore_url=None, files=None, folder=None, wait_until_finished=False):
Swen Vermeul
committed
if datastore_url is None:
if files is None:
raise ValueError("Please provide a filename.")
Swen Vermeul
committed
if folder is None:
# create a unique foldername
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
if isinstance(files, str):
files = [files]
self.files = files
self.startByte = 0
self.endByte = 0
# define a queue to handle the upload threads
queue = DataSetUploadQueue()
real_files = []
for filename in files:
if os.path.isdir(filename):
real_files.extend(
[os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(filename)) for f in fn])
else:
real_files.append(os.path.join(filename))
# compose the upload-URL and put URL and filename in the upload queue
for filename in real_files:
file_in_wsp = os.path.join(folder, filename)
Swen Vermeul
committed
self.files_in_wsp.append(file_in_wsp)
upload_url = (
Swen Vermeul
committed
datastore_url + '/session_workspace_file_upload'
+ '?filename=' + os.path.join(folder, quote(filename))
+ '&id=1'
+ '&startByte=0&endByte=0'
+ '&sessionID=' + self.token
)
queue.put([upload_url, filename, self.verify_certificates])
# wait until all files have uploaded
if wait_until_finished:
queue.join()
# return files with full path in session workspace
Swen Vermeul
committed
return self.files_in_wsp
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
def __init__(self, workers=20):
# maximum files to be uploaded at once
self.upload_queue = Queue()
# define number of threads and start them
for t in range(workers):
t = Thread(target=self.upload_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the upload queue
"""
self.upload_queue.put(things)
def join(self):
""" needs to be called if you want to wait for all uploads to be finished
"""
self.upload_queue.join()
def upload_file(self):
while True:
# get the next item in the queue
upload_url, filename, verify_certificates = self.upload_queue.get()
filesize = os.path.getsize(filename)
# upload the file to our DSS session workspace
with open(filename, 'rb') as f:
resp = requests.post(upload_url, data=f, verify=verify_certificates)
resp.raise_for_status()
data = resp.json()
assert filesize == int(data['size'])
# Tell the queue that we are done
self.upload_queue.task_done()
Swen Vermeul
committed
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
def __init__(self, workers=20):
# maximum files to be downloaded at once
self.download_queue = Queue()
# define number of threads
for t in range(workers):
t = Thread(target=self.download_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the download queue
"""
self.download_queue.put(things)
def join(self):
""" needs to be called if you want to wait for all downloads to be finished
"""
self.download_queue.join()
def download_file(self):
while True:
url, filename, file_size, verify_certificates = self.download_queue.get()
Swen Vermeul
committed
# create the necessary directory structure if they don't exist yet
os.makedirs(os.path.dirname(filename), exist_ok=True)
# request the file in streaming mode
r = requests.get(url, stream=True, verify=verify_certificates)
Swen Vermeul
committed
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
Swen Vermeul
committed
f.write(chunk)
assert os.path.getsize(filename) == int(file_size)
Swen Vermeul
committed
self.download_queue.task_done()
Swen Vermeul
committed
def __init__(self, openbis_obj, type, data=None, props=None, **kwargs):
self.__dict__['openbis'] = openbis_obj
self.__dict__['type'] = type
self.__dict__['p'] = PropertyHolder(openbis_obj, type)
self.__dict__['a'] = AttrHolder(openbis_obj, 'DataSet', type)
# existing OpenBIS object
if data is not None:
self._set_data(data)
Swen Vermeul
committed
if props is not None:
for key in props:
setattr(self.p, key, props[key])
if kwargs is not None:
for key in kwargs:
setattr(self, key, kwargs[key])
def __eq__(self, other):
return str(self) == str(other)
def __ne__(self, other):
return str(self) != str(other)
def _set_data(self, data):
# assign the attribute data to self.a by calling it
# (invoking the AttrHolder.__call__ function)
self.a(data)
self.__dict__['data'] = data
# put the properties in the self.p namespace (without checking them)
if 'properties' in data:
for key, value in data['properties'].items():
self.p.__dict__[key.lower()] = value
Swen Vermeul
committed
@property
def attrs(self):
return self.__dict__['a']
return self.openbis.get_project(self._project['identifier'])
except Exception:
pass
return self.openbis.get_experiment(self._experiment['identifier'])
except Exception:
pass
return self.openbis.get_sample(self._sample['identifier'])
object = sample # Alias
def __getattr__(self, name):
return getattr(self.__dict__['a'], name)
def __setattr__(self, name, value):
if name in ['set_properties', 'set_tags', 'add_tags']:
raise ValueError("These are methods which should not be overwritten")
setattr(self.__dict__['a'], name, value)
def _repr_html_(self):
"""Print all the assigned attributes (identifier, tags, etc.) in a nicely formatted table. See
AttributeHolder class.
"""
return self.a._repr_html_()
def __repr__(self):
"""same thing as _repr_html_() but for IPython
"""
return self.a.__repr__()
class LinkedData():
def __init__(self, data=None):
self.data = data if data is not None else []
self.attrs = ['externalCode', 'contentCopies']
def __dir__(self):
return self.attrs
def __getattr__(self, name):
if name in self.attrs:
if name in self.data:
return self.data[name]
else:
return ''
Swen Vermeul
committed
class PhysicalData():
def __init__(self, data=None):
if data is None:
data = []
self.data = data
self.attrs = ['speedHint', 'complete', 'shareId', 'size',
'fileFormatType', 'storageFormat', 'location', 'presentInArchive',
'storageConfirmation', 'locatorType', 'status']
Swen Vermeul
committed
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
def __dir__(self):
return self.attrs
def __getattr__(self, name):
if name in self.attrs:
if name in self.data:
return self.data[name]
else:
return ''
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>attribute</th>
<th>value</th>
</tr>
</thead>
<tbody>
"""
for attr in self.attrs:
html += "<tr> <td>{}</td> <td>{}</td> </tr>".format(
attr, getattr(self, attr, '')
Swen Vermeul
committed
)
html += """
</tbody>
</table>
"""
return html
def __repr__(self):
headers = ['attribute', 'value']
lines = []
for attr in self.attrs:
lines.append([
attr,
getattr(self, attr, '')
])
return tabulate(lines, headers=headers)
class DataSet(OpenBisObject):
""" DataSet are openBIS objects that contain the actual files.
"""
def __init__(self, openbis_obj, type=type, data=None, files=None, folder=None, props=None, **kwargs):
Swen Vermeul
committed
super(DataSet, self).__init__(openbis_obj, type, data, props, **kwargs)
# existing DataSet
if data is not None:
if data['physicalData'] is None:
self.__dict__['shareId'] = None
self.__dict__['location'] = None
else:
self.__dict__['shareId'] = data['physicalData']['shareId']
self.__dict__['location'] = data['physicalData']['location']
# new DataSet
if files is not None:
self.__dict__['files'] = files
self.__dict__['folder'] = folder
def __str__(self):
return self.data['code']
def __dir__(self):
'props', 'get_parents()', 'get_children()',
'add_parents()', 'add_children()', 'del_parents()', 'del_children()',
'sample', 'experiment', 'physicalData', 'linkedData',
'tags', 'set_tags()', 'add_tags()', 'del_tags()',
'add_attachment()', 'get_attachments()', 'download_attachments()',
"get_files(start_folder='/')", 'file_list',
'download(files=None, destination=None, wait_until_finished=True)',
'status', 'archive()', 'unarchive()', 'data'
def __setattr__(self, name, value):
if name in ['folder']:
self.__dict__[name] = value
else:
super(DataSet, self).__setattr__(name, value)
@property
def props(self):
return self.__dict__['p']
@property
def type(self):
return self.__dict__['type']
@type.setter
def type(self, type_name):
dataset_type = self.openbis.get_dataset_type(type_name.upper())
self.p.__dict__['_type'] = dataset_type
self.a.__dict__['_type'] = dataset_type
Swen Vermeul
committed
@property
def physicalData(self):
if 'physicalData' in self.data:
return PhysicalData(self.data['physicalData'])
@property
def linkedData(self):
if 'linkedData' in self.data:
return LinkedData(self.data['linkedData'])
Swen Vermeul
committed
@property
def status(self):
ds = self.openbis.get_dataset(self.permId)
self.data['physicalData'] = ds.data['physicalData']
try:
return self.data['physicalData']['status']
except Exception:
return None
def archive(self, remove_from_data_store=True):
fetchopts = {
"removeFromDataStore": remove_from_data_store,
"@type": "as.dto.dataset.archive.DataSetArchiveOptions"
}
self.archive_unarchive('archiveDataSets', fetchopts)
if VERBOSE: print("DataSet {} archived".format(self.permId))
def unarchive(self):
fetchopts = {
"@type": "as.dto.dataset.unarchive.DataSetUnarchiveOptions"
}
self.archive_unarchive('unarchiveDataSets', fetchopts)
if VERBOSE: print("DataSet {} unarchived".format(self.permId))
def archive_unarchive(self, method, fetchopts):
dss = self.get_datastore
payload = {}
request = {
"method": method,
"params": [
self.openbis.token,
[{
"permId": self.permId,
"@type": "as.dto.dataset.id.DataSetPermId"
}],
dict(fetchopts)
],
}
resp = self.openbis._post_request(self._openbis.as_v3, request)
return
def set_properties(self, properties):
self.openbis.update_dataset(self.permId, properties=properties)
Swen Vermeul
committed
def download(self, files=None, destination=None, wait_until_finished=True, workers=10):
""" download the actual files and put them by default in the following folder:
__current_dir__/destination/dataset_permId/
If no files are specified, all files of a given dataset are downloaded.
If no destination is specified, the hostname is chosen instead.
Files are usually downloaded in parallel, using 10 workers by default. If you want to wait until
all the files are downloaded, set the wait_until_finished option to True.
Swen Vermeul
committed
"""
if files == None:
elif isinstance(files, str):
files = [files]
if destination is None:
destination = self.openbis.hostname
base_url = self.data['dataStore']['downloadUrl'] + '/datastore_server/' + self.permId + '/'
Swen Vermeul
committed
queue = DataSetDownloadQueue(workers=workers)
# get file list and start download
for filename in files:
file_info = self.get_file_list(start_folder=filename)
file_size = file_info[0]['fileSize']
download_url = base_url + filename + '?sessionID=' + self.openbis.token
filename_dest = os.path.join(destination, self.permId, filename)
queue.put([download_url, filename_dest, file_size, self.openbis.verify_certificates])
Swen Vermeul
committed
# wait until all files have downloaded
if wait_until_finished:
queue.join()
if VERBOSE: print("Files downloaded to: %s" % os.path.join(destination, self.permId))
@property
def folder(self):
return self.__dict__['folder']
def file_list(self):
"""returns the list of files including their directories as an array of strings. Just folders are not
listed.
"""
files = []
for file in self.get_file_list(recursive=True):
if file['isDirectory']:
pass
else:
files.append(file['pathInDataSet'])
return files
def get_files(self, start_folder='/'):
"""Returns a DataFrame of all files in this dataset
"""
def createRelativePath(pathInDataSet):
if self.shareId is None:
return ''
else:
return os.path.join(self.shareId, self.location, pathInDataSet)
def signed_to_unsigned(sig_int):
"""openBIS delivers crc32 checksums as signed integers.
If the number is negative, we just have to add 2**32
We display the hex number to match with the classic UI
"""
if sig_int < 0:
sig_int += 2 ** 32
return "%x" % (sig_int & 0xFFFFFFFF)
files = self.get_file_list(start_folder=start_folder)