Newer
Older
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
pybis.py
"""
import os
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
Swen Vermeul
committed
import time
from datetime import datetime
from collections import namedtuple
import pandas as pd
from pandas import DataFrame, Series
Swen Vermeul
committed
import threading
from threading import Thread
from queue import Queue
DROPBOX_PLUGIN = "jupyter-uploader-api"
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def _definitions(what):
entities = {
"Sample": {
"attrs": "space experiment project parents children container components tags".split(),
"ids2type": {
'parentIds': { 'permId': { '@type': 'as.dto.sample.id.SamplePermId' } },
'childIds': { 'permId': { '@type': 'as.dto.sample.id.SamplePermId' } },
'componentIds': { 'permId': {'@type': 'as.dto.sample.id.SamplePermId' } },
},
"identifier": "sampleId",
"up_method": "updateSamples",
"cre_method": "createSamples",
"cre_type": "as.dto.sample.create.SampleCreation",
"multi": "parents children components tags".split(),
},
"Experiment": {
"attrs": "project tags".split(),
"multi": "tags".split(),
"identifier": "experimentId",
"up_method": "updateExperimentss",
"cre_method": "createExperimentss",
},
"DataSet": {
"autoGeneratedCode" : True,
"attrs": "experiment sample parents children container components tags".split(),
"ids2type": {
'parentIds': { 'permId': { '@type': 'as.dto.dataset.id.DataSetPermId' } },
'childIds': { 'permId': { '@type': 'as.dto.dataset.id.DataSetPermId' } },
},
"multi": [],
"identifier": "dataSetId",
"up_method": "updateDataSets",
"cre_method": "createDataSets",
},
"attr2ids": {
"sample" : "sampleId",
"experiment" : "experimentId",
"space" : "spaceId",
"container" : "containerId",
"component" : "componentId",
"components" : "componentIds",
"parents" : "parentIds",
"children" : "childIds",
"project" : "projectId",
"tags" : "tagIds",
},
"ids2type": {
'spaceId': { 'permId': { '@type': 'as.dto.space.id.SpacePermId' } },
'projectId': { 'permId': { '@type': 'as.dto.project.id.ProjectPermId' } },
'experimentId': { 'permId': { '@type': 'as.dto.experiment.id.ExperimentPermId' } },
'tagIds': { 'code': { '@type': 'as.dto.tag.id.TagCode' } },
},
}
return entities[what]
search_for_type = {
"space": "as.dto.space.search.SpaceSearchCriteria",
"project": "as.dto.project.search.ProjectSearchCriteria",
"experiment": "as.dto.experiment.search.ExperimentSearchCriteria",
"code": "as.dto.common.search.CodeSearchCriteria",
"sample_type":"as.dto.sample.search.SampleTypeSearchCriteria",
"space": { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" },
"project": { "@type": "as.dto.project.fetchoptions.ProjectFetchOptions" },
"experiment": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" },
"sample": { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
"dataset": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"physicalData": { "@type": "as.dto.dataset.fetchoptions.PhysicalDataFetchOptions" },
"linkedData": { "@type": "as.dto.dataset.fetchoptions.LinkedDataFetchOptions" },
"properties": { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
"tags": { "@type": "as.dto.tag.fetchoptions.TagFetchOptions" },
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"leader": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"attachments": { "@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions" },
"history": { "@type": "as.dto.history.fetchoptions.HistoryEntryFetchOptions" },
"dataStore": { "@type": "as.dto.datastore.fetchoptions.DataStoreFetchOptions" },
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def parse_jackson(input_json):
"""openBIS uses a library called «jackson» to automatically generate the JSON RPC output.
Objects that are found the first time are added an attribute «@id».
Any further findings only carry this reference id.
This function is used to dereference the output.
"""
interesting=['tags', 'registrator', 'modifier', 'type', 'parents',
'children', 'containers', 'properties', 'experiment', 'sample',
'project', 'space', 'propertyType'
]
found = {}
def build_cache(graph):
if isinstance(graph, list):
for item in graph:
build_cache(item)
elif isinstance(graph, dict) and len(graph) > 0:
for key, value in graph.items():
if key in interesting:
if isinstance(value, dict):
if '@id' in value:
found[value['@id']] = value
build_cache(value)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
if '@id' in item:
found[item['@id']] = item
build_cache(item)
elif isinstance(value, dict):
build_cache(value)
elif isinstance(value, list):
build_cache(value)
def deref_graph(graph):
if isinstance(graph, list):
for item in graph:
deref_graph(item)
elif isinstance(graph, dict) and len(graph) > 0:
for key, value in graph.items():
if key in interesting:
if isinstance(value, dict):
deref_graph(value)
elif isinstance(value, int):
graph[key] = found[value]
elif isinstance(value, list):
for i, list_item in enumerate(value):
if isinstance(list_item, int):
value[i] = found[list_item]
elif isinstance(value, dict):
deref_graph(value)
elif isinstance(value, list):
deref_graph(value)
build_cache(input_json)
deref_graph(input_json)
def check_datatype(type_name, value):
if type_name == 'INTEGER':
return isinstance(value, int)
if type_name == 'BOOLEAN':
return isinstance(value, bool)
if type_name == 'VARCHAR':
return isinstance(value, str)
return True
def _determine_ident_type(ident):
# assume we got a sample identifier e.g. /TEST/TEST-SAMPLE
match = re.match('/', ident)
if match:
return "identifier"
else:
return "permId"
def search_request_for_identifier(ident, entity_type):
search_request = {}
itype = _determine_ident_type(ident)
if itype == "identifier":
search_request = {
"identifier": ident.upper(),
"@type": "as.dto.{}.id.{}Identifier".format(entity_type.lower(), entity_type.capitalize())
}
else:
search_request = {
"permId": ident,
"@type": "as.dto.{}.id.{}PermId".format(entity_type.lower(), entity_type.capitalize())
}
return search_request
def table_for_attributes(attributes):
table = '<table border="1" class="dataframe"><thead><tr style="text-align: right;"> <th>attribute</th> <th>value</th> </tr> </thead><tbody>'
for key, val in attributes.items():
table += '<tr><th>{}</th><td>{}</td></tr>'.format(key, val)
table += '</tbody></table>'
return table
def format_timestamp(ts):
return datetime.fromtimestamp(round(ts/1000)).strftime('%Y-%m-%d %H:%M:%S')
def extract_code(obj):
return obj['code']
def extract_deletion(obj):
del_objs = []
for deleted_object in obj['deletedObjects']:
del_objs.append({
"reason": obj['reason'],
"permId": deleted_object["id"]["permId"],
"type": deleted_object["id"]["@type"]
})
return del_objs
def extract_identifier(ident):
if not isinstance(ident, dict):
return str(ident)
return ident['identifier']
def extract_nested_identifier(ident):
if not isinstance(ident, dict):
return str(ident)
return ident['identifier']['identifier']
def extract_permid(permid):
if not isinstance(permid, dict):
return str(permid)
return permid['permId']
def extract_nested_permid(permid):
if not isinstance(permid, dict):
return str(permid)
return permid['permId']['permId']
def extract_property_assignments(pas):
pa_strings = []
for pa in pas:
if not isinstance(pa['propertyType'], dict):
pa_strings.append(pa['propertyType'])
else:
pa_strings.append(pa['propertyType']['label'])
return pa_strings
def extract_person(person):
if 'email' in person and person['email'] is not '':
return "%s %s <%s>" % (person['firstName'], person['lastName'], person['email'])
else:
return "%s %s" % (person['firstName'], person['lastName'])
def extract_properties(prop):
if isinstance(prop, dict):
newline = "; "
props = []
for key in prop:
props.append("%s: %s" % (key, prop[key]))
return newline.join(props)
def extract_tags(tags):
if isinstance(tags, dict):
tags = [tags]
new_tags = []
for tag in tags:
new_tags.append(tag["code"])
return new_tags
def extract_attachments(attachments):
att = []
for attachment in attachments:
att.append(attachment['fileName'])
return att
def signed_to_unsigned(sig_int):
"""openBIS delivers crc32 checksums as signed integers.
If the number is negative, we just have to add 2**32
We display the hex number to match with the classic UI
"""
if sig_int < 0:
sig_int += 2**32
return "%x"%(sig_int & 0xFFFFFFFF)
"""since Python3 the zlib module returns unsigned integers (2.7: signed int)
"""
prev = 0
for eachLine in open(fileName,"rb"):
prev = zlib.crc32(eachLine, prev)
# return as hex
return "%x"%(prev & 0xFFFFFFFF)
def _create_tagIds(tags=None):
if tags is None:
return None
tagIds = []
for tag in tags:
tagIds.append({ "code": tag, "@type": "as.dto.tag.id.TagCode" })
return tagIds
def _tagIds_for_tags(tags=None, action='Add'):
"""creates an action item to add or remove tags. Action is either 'Add', 'Remove' or 'Set'
"""
if tags is None:
return
if not isinstance(tags, list):
tags = [tags]
items = []
for tag in tags:
items.append({
"code": tag,
"@type": "as.dto.tag.id.TagCode"
})
tagIds = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
def _list_update(ids=None, entity=None, action='Add'):
"""creates an action item to add, set or remove ids.
"""
if ids is None:
return
if not isinstance(ids, list):
ids = [ids]
items = []
for ids in ids:
items.append({
"code": ids,
"@type": "as.dto.{}.id.{}Code".format(entity.lower(), entity)
})
list_update = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
return list_update
def _create_typeId(type):
return {
"permId": type.upper(),
"@type": "as.dto.entitytype.id.EntityTypePermId"
}
def _create_projectId(ident):
match = re.match('/', ident)
if match:
return {
"identifier": ident,
"@type": "as.dto.project.id.ProjectIdentifier"
}
else:
return {
"permId": ident,
"@type": "as.dto.project.id.ProjectPermId"
}
def _common_search(search_type, value, comparison="StringEqualToValue"):
sreq = {
"@type": search_type,
"fieldValue": {
"value": value,
"@type": "as.dto.common.search.{}".format(comparison)
}
}
return sreq
def _criteria_for_code(code):
return {
"fieldValue": {
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
},
"@type": "as.dto.common.search.CodeSearchCriteria"
}
def _subcriteria_for_type(code, entity_type):
return {
"@type": "as.dto.{}.search.{}TypeSearchCriteria".format(entity_type.lower(), entity_type),
"criteria": [
{
"@type": "as.dto.common.search.CodeSearchCriteria",
"fieldValue": {
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
}
}
]
}
def _split_identifier(ident):
bla = []
bla=ident.upper().split("/")
results = {}
try:
if bla[0] == '':
bla.pop(0)
if bla[-1] == '':
bla.pop(-1)
results["space"] = bla.pop(0)
results["code"] = bla.pop(-1)
results["experiment"] = bla.pop(0)
except Exception:
pass
return results
def _gen_search_request(req):
sreq = {}
for key, val in req.items():
if key == "criteria":
items = []
for item in req['criteria']:
items.append(_gen_search_request(item))
sreq['criteria'] = items
elif key == "code":
sreq["criteria"] = [_common_search(
"as.dto.common.search.CodeSearchCriteria", val.upper()
)]
elif key == "permid":
sreq["criteria"] = [_common_search(
"as.dto.common.search.PermIdSearchCriteria", val
)]
elif key == "identifier":
si = _split_identifier(val)
sreq["criteria"] = []
if "space" in si:
sreq["criteria"].append(
_gen_search_request({ "space": "Space", "code": si["space"] })
)
if "experiment" in si:
pass
if "code" in si:
sreq["criteria"].append(
_common_search(
"as.dto.common.search.CodeSearchCriteria", si["code"].upper()
)
)
elif key == "operator":
sreq["operator"] = val.upper()
else:
sreq["@type"] = "as.dto.{}.search.{}SearchCriteria".format(key, val)
return sreq
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
def _subcriteria_for_tags(tags):
if not isinstance(tags, list):
tags = [tags]
criterias = []
for tag in tags:
criterias.append({
"fieldName": "code",
"fieldType": "ATTRIBUTE",
"fieldValue": {
"value": tag,
"@type": "as.dto.common.search.StringEqualToValue"
},
"@type": "as.dto.common.search.CodeSearchCriteria"
})
return {
"@type": "as.dto.tag.search.TagSearchCriteria",
"operator": "AND",
"criteria": criterias
}
def _subcriteria_for_is_finished(is_finished):
return {
"@type": "as.dto.common.search.StringPropertySearchCriteria",
"fieldName": "FINISHED_FLAG",
"fieldType": "PROPERTY",
"fieldValue": {
"value": is_finished,
"@type": "as.dto.common.search.StringEqualToValue"
}
}
def _subcriteria_for_properties(prop, val):
return {
"@type": "as.dto.common.search.StringPropertySearchCriteria",
"fieldName": prop.upper(),
"fieldType": "PROPERTY",
"fieldValue": {
"value": val,
"@type": "as.dto.common.search.StringEqualToValue"
}
}
def _subcriteria_for_permid(permids, entity_type, parents_or_children='Parents'):
if not isinstance(permids, list):
permids = [permids]
criterias = []
for permid in permids:
criterias.append( {
"@type": "as.dto.common.search.PermIdSearchCriteria",
"fieldValue": {
"value": permid,
"@type": "as.dto.common.search.StringEqualToValue"
},
"fieldType": "ATTRIBUTE",
"fieldName": "code"
} )
criteria = {
"criteria": criterias,
"@type": "as.dto.{}.search.{}{}SearchCriteria".format(
entity_type.lower(), entity_type, parents_or_children
),
"operator": "OR"
}
return criteria
def _subcriteria_for_code(code, object_type):
criteria = {
"criteria": [
{
"fieldName": "code",
"fieldType": "ATTRIBUTE",
"fieldValue": {
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
},
"@type": "as.dto.common.search.CodeSearchCriteria"
}
],
"@type": search_for_type[object_type],
"operator": "AND"
}
crit = _gen_search_request({
object_type.lower() : object_type.capitalize(),
"operator": "AND",
"code": code
})
class Openbis:
"""Interface for communicating with openBIS. A current version of openBIS is needed.
(minimum version 16.05).
Swen Vermeul
committed
def __init__(self, url='https://localhost:8443', verify_certificates=True, token=None):
"""Initialize a new connection to an openBIS server.
"""
url_obj = urlparse(url)
if url_obj.netloc is None:
raise ValueError("please provide the url in this format: https://openbis.host.ch:8443")
self.url_obj = url_obj
self.url = url_obj.geturl()
self.port = url_obj.port
self.hostname = url_obj.hostname
self.as_v3 = '/openbis/openbis/rmi-application-server-v3.json'
self.as_v1 = '/openbis/openbis/rmi-general-information-v1.json'
self.reg_v1 = '/openbis/openbis/rmi-query-v1.json'
Chandrasekhar Ramakrishnan
committed
self.verify_certificates = verify_certificates
Swen Vermeul
committed
self.token = token
self.dataset_types = None
self.sample_types = None
Swen Vermeul
committed
self.files_in_wsp = []
Swen Vermeul
committed
self.token_path = None
# some default settings for working with samples etc.
self.default_space = None
self.default_project = None
self.default_experiment = None
self.default_sample_type = None
Swen Vermeul
committed
# use an existing token, if available
if self.token is None:
@property
def spaces(self):
return self.get_spaces()
@property
def projects(self):
return self.get_projects()
Swen Vermeul
committed
"""Read the token from the cache, and set the token ivar to it, if there, otherwise None.
If the token is not valid anymore, delete it.
"""
token_path = self.gen_token_path()
Chandrasekhar Ramakrishnan
committed
if not os.path.exists(token_path):
Swen Vermeul
committed
return None
Chandrasekhar Ramakrishnan
committed
try:
with open(token_path) as f:
Swen Vermeul
committed
token = f.read()
if not self.is_token_valid(token):
Chandrasekhar Ramakrishnan
committed
os.remove(token_path)
Swen Vermeul
committed
return None
else:
return token
except FileNotFoundError:
Swen Vermeul
committed
return None
Swen Vermeul
committed
def gen_token_path(self, parent_folder=None):
"""generates a path to the token file.
The token is usually saved in a file called
~/.pybis/hostname.token
"""
Chandrasekhar Ramakrishnan
committed
if parent_folder is None:
Swen Vermeul
committed
# save token under ~/.pybis folder
parent_folder = os.path.join(
os.path.expanduser("~"),
'.pybis'
)
path = os.path.join(parent_folder, self.hostname + '.token')
Chandrasekhar Ramakrishnan
committed
return path
Swen Vermeul
committed
def save_token(self, token=None, parent_folder=None):
""" saves the session token to the disk, usually here: ~/.pybis/hostname.token. When a new Openbis instance is created, it tries to read this saved token by default.
Swen Vermeul
committed
if token is None:
token = self.token
token_path = None;
if parent_folder is None:
token_path = self.gen_token_path()
else:
token_path = self.gen_token_path(parent_folder)
# create the necessary directories, if they don't exist yet
Chandrasekhar Ramakrishnan
committed
os.makedirs(os.path.dirname(token_path), exist_ok=True)
with open(token_path, 'w') as f:
Swen Vermeul
committed
f.write(token)
self.token_path = token_path
def delete_token(self, token_path=None):
Swen Vermeul
committed
if token_path is None:
token_path = self.token_path
os.remove(token_path)
def _post_request(self, resource, data):
""" internal method, used to handle all post requests and serializing / deserializing
data
"""
if "id" not in data:
data["id"] = "1"
if "jsonrpc" not in data:
data["jsonrpc"] = "2.0"
resp = requests.post(
self.url + resource,
json.dumps(data),
verify=self.verify_certificates
)
data = resp.json()
if 'error' in data:
raise ValueError('an error has occured: ' + data['error']['message'] )
elif 'result' in data:
return data['result']
else:
raise ValueError('request did not return either result nor error')
else:
raise ValueError('general error while performing post request')
""" Log out of openBIS. After logout, the session token is no longer valid.
if self.token is None:
return
logout_request = {
"method":"logout",
"params":[self.token],
}
resp = self._post_request(self.as_v3, logout_request)
Swen Vermeul
committed
self.token = None
self.token_path = None
Swen Vermeul
committed
def login(self, username=None, password=None, save_token=False):
"""Log into openBIS.
Expects a username and a password and updates the token (session-ID).
The token is then used for every request.
Chandrasekhar Ramakrishnan
committed
Clients may want to store the credentials object in a credentials store after successful login.
Throw a ValueError with the error message if login failed.
"""
login_request = {
"method":"login",
"params":[username, password],
}
result = self._post_request(self.as_v3, login_request)
if result is None:
raise ValueError("login to openBIS failed")
else:
self.token = result
Swen Vermeul
committed
if save_token:
self.save_token()
return self.token
def get_datastores(self):
""" Get a list of all available datastores. Usually there is only one, but in some cases
there might be more. If you upload a file, you need to specifiy the datastore you want
the file uploaded to.
"""
if len(self.datastores) == 0:
request = {
"method": "listDataStores",
"params": [ self.token ],
}
resp = self._post_request(self.as_v1, request)
if resp is not None:
self.datastores = DataFrame(resp)[['code','downloadUrl', 'hostUrl']]
return self.datastores
else:
raise ValueError("No datastore found!")
else:
return self.datastores
def get_spaces(self, code=None):
""" Get a list of all available spaces (DataFrame object). To create a sample or a
dataset, you need to specify in which space it should live.
"""
criteria = {}
options = {}
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
spaces = DataFrame(resp['objects'])
spaces['registrationDate']= spaces['registrationDate'].map(format_timestamp)
spaces['modificationDate']= spaces['modificationDate'].map(format_timestamp)
sp = Things(
self,
'space',
spaces[['code', 'description', 'registrationDate', 'modificationDate']]
)
return sp
def get_space(self, spaceId):
""" Returns a Space object for a given identifier (spaceId).
"""
request = {
"method": "getSpaces",
"params": [
self.token,
[{
"@id": 0,
"permId": spaceId,
"@type": "as.dto.space.id.SpacePermId"
}],
{
"@id": 0,
"@type": "as.dto.space.fetchoptions.SpaceFetchOptions",
"registrator": None,
"samples": None,
"projects": None,
"sort": None
}
],
}
resp = self._post_request(self.as_v3, request)
if len(resp) == 0:
raise ValueError("No such space: %s" % spaceId)
return Space(self, resp[spaceId])
Chandrasekhar Ramakrishnan
committed
def get_samples(self, code=None, permId=None, space=None, project=None, experiment=None, type=None,
withParents=None, withChildren=None, **properties):
""" Get a list of all samples for a given space/project/experiment (or any combination)
"""
if space is None:
space = self.default_space
if project is None:
project = self.default_project
sub_criteria = []
if space:
sub_criteria.append(_gen_search_request({
"space": "Space",
"operator": "AND",
"code": space
})
)
exp_crit = _subcriteria_for_code(experiment, 'experiment')
proj_crit = _subcriteria_for_code(project, 'project')
exp_crit['criteria'] = []
exp_crit['criteria'].append(proj_crit)
sub_criteria.append(exp_crit)
sub_criteria.append(_subcriteria_for_code(experiment, 'experiment'))
if experiment is None:
experiment = self.default_experiment
if properties is not None:
for prop in properties:
sub_criteria.append(_subcriteria_for_properties(prop, properties[prop]))
if type:
sub_criteria.append(_subcriteria_for_code(type, 'sample_type'))
if code:
sub_criteria.append(_criteria_for_code(code))
if permId:
sub_criteria.append(_common_search("as.dto.common.search.PermIdSearchCriteria",permId))
if withParents:
if not isinstance(withParents, list):
withParents = [withParents]
for parent in withParents:
sub_criteria.append(
_gen_search_request({
"sample": "SampleParents",
"identifier": parent
})
)
if withChildren:
if not isinstance(withChildren, list):
withChildren = [withChildren]
for child in withChildren:
sub_criteria.append(
_gen_search_request({
"sample": "SampleChildren",
"identifier": child
})
)
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.sample.search.SampleSearchCriteria",
"operator": "AND"
}
"properties": { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
"tags": { "@type": "as.dto.tag.fetchoptions.TagFetchOptions" },
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"experiment": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" },
"type": { "@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions" },
"@type": "as.dto.sample.fetchoptions.SampleFetchOptions",
request = {
"method": "searchSamples",
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
objects = resp['objects']
parse_jackson(objects)
samples = DataFrame(objects)
if len(samples) is 0:
raise ValueError("No samples found!")
samples['registrationDate']= samples['registrationDate'].map(format_timestamp)
samples['modificationDate']= samples['modificationDate'].map(format_timestamp)
samples['registrator'] = samples['registrator'].map(extract_person)
samples['modifier'] = samples['modifier'].map(extract_person)
samples['identifier'] = samples['identifier'].map(extract_identifier)
samples['permId'] = samples['permId'].map(extract_permid)
samples['experiment'] = samples['experiment'].map(extract_nested_identifier)
samples['sample_type'] = samples['type'].map(extract_nested_permid)
ss = samples[['identifier', 'permId', 'experiment', 'sample_type', 'registrator', 'registrationDate', 'modifier', 'modificationDate']]
return Things(self, 'sample', ss, 'identifier')
else:
raise ValueError("No samples found!")
def get_experiments(self, code=None, type=None, space=None, project=None, tags=None, is_finished=None, **properties):
""" Get a list of all experiment for a given space or project (or any combination)
"""
if space is None:
space = self.default_space
if project is None:
project = self.default_project
sub_criteria = []
if space:
sub_criteria.append(_subcriteria_for_code(space, 'space'))
sub_criteria.append(_subcriteria_for_code(project, 'project'))
sub_criteria.append(_criteria_for_code(code))
if type:
sub_criteria.append(_subcriteria_for_type(type, 'Experiment'))
if tags:
sub_criteria.append(_subcriteria_for_tags(tags))
if is_finished is not None:
sub_criteria.append(_subcriteria_for_is_finished(is_finished))
if properties is not None:
for prop in properties:
sub_criteria.append(_subcriteria_for_properties(prop, properties[prop]))
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.experiment.search.ExperimentSearchCriteria",
"operator": "AND"
}
options = {
"properties": { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
"tags": { "@type": "as.dto.tag.fetchoptions.TagFetchOptions" },
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"project": { "@type": "as.dto.project.fetchoptions.ProjectFetchOptions" },
"type": { "@type": "as.dto.experiment.fetchoptions.ExperimentTypeFetchOptions" },
"@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions"
}
request = {
"method": "searchExperiments",
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
if len(resp['objects']) == 0:
raise ValueError("No experiments found!")
objects = resp['objects']
parse_jackson(objects)
experiments = DataFrame(objects)
experiments['registrationDate']= experiments['registrationDate'].map(format_timestamp)
experiments['modificationDate']= experiments['modificationDate'].map(format_timestamp)
experiments['project']= experiments['project'].map(extract_code)
experiments['registrator'] = experiments['registrator'].map(extract_person)
experiments['modifier'] = experiments['modifier'].map(extract_person)
experiments['identifier'] = experiments['identifier'].map(extract_identifier)
experiments['type'] = experiments['type'].map(extract_code)
exps = experiments[['code', 'identifier', 'project', 'type', 'registrator',
'registrationDate', 'modifier', 'modificationDate']]
return Things(self, 'experiment', exps, 'identifier')
def get_datasets(self, code=None, type=None, withParents=None, withChildren=None):
sub_criteria = []
if code:
sub_criteria.append(_criteria_for_code(code))
if type:
sub_criteria.append(_subcriteria_for_type(type, 'DataSet'))
if withParents:
sub_criteria.append(_subcriteria_for_permid(withParents, 'DataSet', 'Parents'))
if withChildren:
sub_criteria.append(_subcriteria_for_permid(withChildren, 'DataSet', 'Children'))
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.dataset.search.DataSetSearchCriteria",
"operator": "AND"
}
fetchopts = {
# "parents": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
# "children": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"containers": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"type": { "@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions" }
}
for option in ['tags', 'properties', 'sample']:
fetchopts[option] = fetch_option[option]
request = {
"method": "searchDataSets",
"params": [ self.token,
criteria,
fetchopts,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
objects = resp['objects']
parse_jackson(objects)
datasets = DataFrame(objects)
datasets['registrationDate']= datasets['registrationDate'].map(format_timestamp)
datasets['modificationDate']= datasets['modificationDate'].map(format_timestamp)
datasets['sample']= datasets['sample'].map(extract_nested_identifier)
datasets['type']= datasets['type'].map(extract_code)
ds = Things(
self,
'dataset',
datasets[['code', 'properties', 'type', 'sample', 'registrationDate', 'modificationDate']]
)
return ds
def get_experiment(self, expId):
""" Returns an experiment object for a given identifier (expId).
"""
fetchopts = {
"@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions"
}
search_request = search_request_for_identifier(expId, 'experiment')
for option in ['tags', 'properties', 'attachments', 'project']:
fetchopts[option] = fetch_option[option]
request = {
"method": "getExperiments",
"params": [
self.token,
[ search_request ],
fetchopts
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
],
}
resp = self._post_request(self.as_v3, request)
if len(resp) == 0:
raise ValueError("No such experiment: %s" % expId)
return Experiment(self, resp[expId])
def new_experiment(self, project_ident, code, type, properties=None, attachments=None, tags=None):
tagIds = _create_tagIds(tags)
typeId = _create_typeId(type)
projectId = _create_projectId(project_ident)
if properties is None:
properties = {}
request = {
"method": "createExperiments",
"params": [
self.token,
[
{
"properties": properties,
"code": code,
"typeId" : typeId,
"projectId": projectId,
"tagIds": tagIds,
"attachments": attachments,
"@type": "as.dto.experiment.create.ExperimentCreation",
}
]
],
}
resp = self._post_request(self.as_v3, request)
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
return self.get_experiment(resp[0]['permId'])
def update_experiment(self, experimentId, properties=None, tagIds=None, attachments=None):
params = {
"experimentId": {
"permId": experimentId,
"@type": "as.dto.experiment.id.ExperimentPermId"
},
"@type": "as.dto.experiment.update.ExperimentUpdate"
}
if properties is not None:
params["properties"]= properties
if tagIds is not None:
params["tagIds"] = tagIds
if attachments is not None:
params["attachments"] = attachments
request = {
"method": "updateExperiments",
"params": [
self.token,
[ params ]
]
}
self._post_request(self.as_v3, request)
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
def create_sample(self, space_ident, code, type,
project_ident=None, experiment_ident=None, properties=None, attachments=None, tags=None):
tagIds = _create_tagIds(tags)
typeId = _create_typeId(type)
projectId = _create_projectId(project_ident)
experimentId = _create_experimentId(experiment_ident)
if properties is None:
properties = {}
request = {
"method": "createSamples",
"params": [
self.token,
[
{
"properties": properties,
"code": code,
"typeId" : typeId,
"projectId": projectId,
"experimentId": experimentId,
"tagIds": tagIds,
"attachments": attachments,
"@type": "as.dto.sample.create.SampleCreation",
}
]
],
}
resp = self._post_request(self.as_v3, request)
return self.get_sample(resp[0]['permId'])
def update_sample(self, sampleId, space=None, project=None, experiment=None,
parents=None, children=None, components=None, properties=None, tagIds=None, attachments=None):
params = {
"sampleId": {
"permId": sampleId,
"@type": "as.dto.sample.id.SamplePermId"
},
"@type": "as.dto.sample.update.SampleUpdate"
}
if space is not None:
params['spaceId'] = space
if project is not None:
params['projectId'] = project
if properties is not None:
params["properties"]= properties
if tagIds is not None:
params["tagIds"] = tagIds
if attachments is not None:
params["attachments"] = attachments
request = {
"method": "updateSamples",
"params": [
self.token,
[ params ]
]
}
self._post_request(self.as_v3, request)
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
def delete_entity(self, what, permid, reason):
"""Deletes Spaces, Projects, Experiments, Samples and DataSets
"""
entity_type = "as.dto.{}.id.{}PermId".format(what.lower(), what.capitalize())
request = {
"method": "delete" + what.capitalize() + 's',
"params": [
self.token,
[
{
"permId": permid,
"@type": entity_type
}
],
{
"reason": reason,
"@type": "as.dto.{}.delete.{}DeletionOptions".format(what.lower(), what.capitalize())
}
]
}
self._post_request(self.as_v3, request)
def get_deletions(self):
request = {
"method": "searchDeletions",
"params": [
self.token,
{},
{
"deletedObjects": {
"@type": "as.dto.deletion.fetchoptions.DeletedObjectFetchOptions"
}
}
]
}
resp = self._post_request(self.as_v3, request)
objects = resp['objects']
parse_jackson(objects)
new_objs = []
for value in objects:
del_objs = extract_deletion(value)
if len(del_objs) > 0:
new_objs.append(*del_objs)
return DataFrame(new_objs)
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
def new_project(self, space_code, code, description, leaderId):
request = {
"method": "createProjects",
"params": [
self.token,
[
{
"code": code,
"spaceId": {
"permId": space_code,
"@type": "as.dto.space.id.SpacePermId"
},
"@type": "as.dto.project.create.ProjectCreation",
"description": description,
"leaderId": leaderId,
"attachments": None
}
]
],
}
resp = self._post_request(self.as_v3, request)
return resp
def get_project(self, projectId):
request = self._create_get_request('getProjects', 'project', projectId, ['attachments'])
resp = self._post_request(self.as_v3, request)
return resp
def get_projects(self, space=None):
""" Get a list of all available projects (DataFrame object).
"""
if space is None:
space = self.default_space
sub_criteria = []
if space:
sub_criteria.append(_subcriteria_for_code(space, 'space'))
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.project.search.ProjectSearchCriteria",
"operator": "AND"
}
options = {
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"experiments": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions", },
"space": { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" },
"@type": "as.dto.project.fetchoptions.ProjectFetchOptions"
}
request = {
"method": "searchProjects",
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
objects = resp['objects']
parse_jackson(objects)
projects = DataFrame(objects)
if len(projects) is 0:
raise ValueError("No projects found!")
projects['registrationDate']= projects['registrationDate'].map(format_timestamp)
projects['modificationDate']= projects['modificationDate'].map(format_timestamp)
projects['registrator'] = projects['registrator'].map(extract_person)
projects['modifier'] = projects['modifier'].map(extract_person)
projects['permid'] = projects['permId'].map(extract_permid)
projects['identifier'] = projects['identifier'].map(extract_identifier)
projects['space'] = projects['space'].map(extract_code)
pros=projects[['code', 'space', 'registrator', 'registrationDate',
'modifier', 'modificationDate', 'permid', 'identifier']]
return Things(self, 'project', pros, 'identifier')
else:
raise ValueError("No projects found!")
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
def _create_get_request(self, method_name, entity_type, permids, options):
if not isinstance(permids, list):
permids = [permids]
type = "as.dto.{}.id.{}".format(entity_type.lower(), entity_type.capitalize())
search_params = []
for permid in permids:
# decide if we got a permId or an identifier
match = re.match('/', permid)
if match:
search_params.append(
{ "identifier" : permid, "@type" : type + 'Identifier' }
)
else:
search_params.append(
{ "permId" : permid, "@type": type + 'PermId' }
)
fo = {}
for option in options:
fo[option] = fetch_option[option]
request = {
"method": method_name,
"params": [
self.token,
search_params,
fo
],
}
return request
def get_sample_types(self, type=None):
""" Returns a list of all available sample types
return self._get_types_of("searchSampleTypes", "Sample", type, ["generatedCodePrefix"])
def get_sample_type(self, type):
return self._get_types_of("searchSampleTypes", "Sample", type, ["generatedCodePrefix"])
def get_experiment_types(self, type=None):
""" Returns a list of all available experiment types
"""
return self._get_types_of("searchExperimentTypes", "Experiment", type)
def get_material_types(self, type=None):
""" Returns a list of all available material types
"""
return self._get_types_of("searchMaterialTypes", "Material", type)
def get_dataset_types(self, type=None):
""" Returns a list (DataFrame object) of all currently available dataset types
"""
return self._get_types_of("searchDataSetTypes", "DataSet", type)
def get_terms(self, vocabulary=None):
""" Returns information about vocabulary, including its controlled vocabulary
search_request = {}
if vocabulary is not None:
search_request = _gen_search_request( {
"vocabulary": "VocabularyTerm",
"criteria" : [{
"vocabulary": "Vocabulary",
"code": vocabulary
}]
})
fetch_options = {
"vocabulary" : { "@type" : "as.dto.vocabulary.fetchoptions.VocabularyFetchOptions" },
"@type": "as.dto.vocabulary.fetchoptions.VocabularyTermFetchOptions"
}
request = {
"method": "searchVocabularyTerms",
"params": [ self.token, search_request, fetch_options ]
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
return Vocabulary(resp)
def get_tags(self):
""" Returns a DataFrame of all
"""
request = {
"method": "searchTags",
"params": [ self.token, {}, {} ]
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
objects = DataFrame(resp['objects'])
objects['registrationDate'] = objects['registrationDate'].map(format_timestamp)
return objects[['code', 'registrationDate']]
def _get_types_of(self, method_name, entity_type, type=None, additional_attributes=[]):
""" Returns a list of all available experiment types
"""
attributes = ['code', 'description', *additional_attributes]
search_request = {}
fetch_options = {}
if type is not None:
search_request = _gen_search_request({
entity_type.lower(): entity_type.capitalize() + "Type",
"operator": "AND",
"code": type
})
fetch_options = {
"propertyAssignments" : {
"@type" : "as.dto.property.fetchoptions.PropertyAssignmentFetchOptions"
},
"@type": "as.dto.{}.fetchoptions.{}TypeFetchOptions".format(entity_type.lower(), entity_type)
}
attributes.append('propertyAssignments')
"params": [ self.token, search_request, fetch_options ],
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
if type is not None and len(resp['objects']) == 1:
return PropertyAssignments(self, resp['objects'][0])
if len(resp['objects']) >= 1:
types = DataFrame(resp['objects'])
return types[attributes]
Swen Vermeul
committed
def is_session_active(self):
""" checks whether a session is still active. Returns true or false.
"""
Swen Vermeul
committed
return self.is_token_valid(self.token)
def is_token_valid(self, token=None):
Chandrasekhar Ramakrishnan
committed
"""Check if the connection to openBIS is valid.
This method is useful to check if a token is still valid or if it has timed out,
requiring the user to login again.
Chandrasekhar Ramakrishnan
committed
:return: Return True if the token is valid, False if it is not valid.
"""
if token is None:
token = self.token
if token is None:
return False
request = {
"method": "isSessionActive",
"params": [ token ],
resp = self._post_request(self.as_v1, request)
"""fetch a dataset and some metadata attached to it:
- properties
- sample
- parents
- children
- containers
- dataStore
- physicalData
- linkedData
:return: a DataSet object
"""
criteria = [{
"permId": permid,
"@type": "as.dto.dataset.id.DataSetPermId"
}]
fetchopts = {
"parents": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"children": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"containers": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions",
}
for option in ['tags', 'properties', 'dataStore', 'physicalData', 'linkedData',
'experiment', 'sample']:
fetchopts[option] = fetch_option[option]
request = {
"params": [ self.token,
criteria,
fetchopts,
resp = self._post_request(self.as_v3, request)
if resp is not None:
for permid in resp:
return DataSet(self, resp[permid])
Chandrasekhar Ramakrishnan
committed
def get_sample(self, sample_ident, only_data=False):
Chandrasekhar Ramakrishnan
committed
"""Retrieve metadata for the sample.
Get metadata for the sample and any directly connected parents of the sample to allow access
to the same information visible in the ELN UI. The metadata will be on the file system.
:param sample_identifiers: A list of sample identifiers to retrieve.
"""
search_request = search_request_for_identifier(sample_ident, 'sample')
"type": {
"@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions"
},
"parents": { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
"children": { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
"experiment": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" },
"dataSets": {
"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions",
"properties": {
"@type": "as.dto.property.fetchoptions.PropertyFetchOptions"
},
"type": {
"@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions"
},
},
"space": fetch_option['space'],
"properties": fetch_option['properties'],
"registrator": fetch_option['registrator'],
"tags": fetch_option['tags'],
}
sample_request = {
"method": "getSamples",
"params": [
self.token,
[ search_request ],
resp = self._post_request(self.as_v3, sample_request)
parse_jackson(resp)
if resp is None or len(resp) == 0:
raise ValueError('no such sample found: '+sample_ident)
else:
for sample_ident in resp:
if only_data:
return resp[sample_ident]
else:
return Sample(self, self.get_sample_type(resp[sample_ident]["type"]["code"]), resp[sample_ident])
def new_space(self, name, description=None):
""" Creates a new space in the openBIS instance. Returns a list of all spaces
"""
request = {
"method": "createSpaces",
"params": [
self.token,
[ {
"@id": 0,
"code": name,
"description": description,
"@type": "as.dto.space.create.SpaceCreation"
} ]
],
}
resp = self._post_request(self.as_v3, request)
return self.get_spaces(refresh=True)
Swen Vermeul
committed
def new_analysis(self, name, description=None, sample=None, dss_code=None, result_files=None,
notebook_files=None, parents=[]):
""" An analysis contains the Jupyter notebook file(s) and some result files.
Technically this method involves uploading files to the session workspace
and activating the dropbox aka dataset ingestion service "jupyter-uploader-api"
Swen Vermeul
committed
"""
if dss_code is None:
dss_code = self.get_datastores()['code'][0]
# if a sample identifier was given, use it as a string.
# if a sample object was given, take its identifier
# TODO: handle permId's
sample_identifier = None
if isinstance(sample, str):
sample_identifier = sample
else:
sample_identifier = sample.ident
Swen Vermeul
committed
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
Swen Vermeul
committed
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
data_sets = []
if notebook_files is not None:
notebooks_folder = os.path.join(folder, 'notebook_files')
self.upload_files(
datastore_url = datastore_url,
files=notebook_files,
folder= notebooks_folder,
wait_until_finished=True
)
data_sets.append({
"dataSetType" : "JUPYTER_NOTEBOOk",
"sessionWorkspaceFolder": notebooks_folder,
"fileNames" : notebook_files,
"properties" : {}
})
if result_files is not None:
results_folder = os.path.join(folder, 'result_files')
self.upload_files(
datastore_url = datastore_url,
files=result_files,
folder=results_folder,
wait_until_finished=True
)
data_sets.append({
"dataSetType" : "JUPYTER_RESULT",
"sessionWorkspaceFolder" : results_folder,
"fileNames" : result_files,
"properties" : {}
})
request = {
"method": "createReportFromAggregationService",
"params": [
self.token,
dss_code,
Swen Vermeul
committed
DROPBOX_PLUGIN,
Swen Vermeul
committed
"identifier" : sample.identifier
Swen Vermeul
committed
"containers" : [
{
"dataSetType" : "JUPYTER_CONTAINER",
"properties" : {
"NAME" : name,
"DESCRIPTION" : description
Swen Vermeul
committed
}
Swen Vermeul
committed
"dataSets" : data_sets,
"parents" : parents,
resp = self._post_request(self.reg_v1, request)
try:
if resp['rows'][0][0]['value'] == 'OK':
return resp['rows'][0][1]['value']
except:
return resp
Swen Vermeul
committed
def new_sample(self, type, **kwargs):
""" Creates a new sample of a given sample type.
return Sample(self, self.get_sample_type(type), None, **kwargs)
def _get_dss_url(self, dss_code=None):
""" internal method to get the downloadURL of a datastore.
"""
Swen Vermeul
committed
dss = self.get_datastores()
if dss_code is None:
return dss['downloadUrl'][0]
else:
Swen Vermeul
committed
return dss[dss['code'] == dss_code]['downloadUrl'][0]
Swen Vermeul
committed
def upload_files(self, datastore_url=None, files=None, folder=None, wait_until_finished=False):
Swen Vermeul
committed
if datastore_url is None:
if files is None:
raise ValueError("Please provide a filename.")
Swen Vermeul
committed
if folder is None:
# create a unique foldername
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
if isinstance(files, str):
files = [files]
self.files = files
self.startByte = 0
self.endByte = 0
# define a queue to handle the upload threads
queue = DataSetUploadQueue()
real_files = []
for filename in files:
if os.path.isdir(filename):
real_files.extend([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(filename)) for f in fn])
else:
real_files.append(os.path.join(filename))
# compose the upload-URL and put URL and filename in the upload queue
for filename in real_files:
file_in_wsp = os.path.join(folder, filename)
Swen Vermeul
committed
self.files_in_wsp.append(file_in_wsp)
upload_url = (
Swen Vermeul
committed
datastore_url + '/session_workspace_file_upload'
+ '?filename=' + os.path.join(folder,filename)
+ '&id=1'
+ '&startByte=0&endByte=0'
+ '&sessionID=' + self.token
)
queue.put([upload_url, filename, self.verify_certificates])
# wait until all files have uploaded
if wait_until_finished:
queue.join()
# return files with full path in session workspace
Swen Vermeul
committed
return self.files_in_wsp
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
class DataSetUploadQueue:
def __init__(self, workers=20):
# maximum files to be uploaded at once
self.upload_queue = Queue()
# define number of threads and start them
for t in range(workers):
t = Thread(target=self.upload_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the upload queue
"""
self.upload_queue.put(things)
def join(self):
""" needs to be called if you want to wait for all uploads to be finished
"""
self.upload_queue.join()
def upload_file(self):
while True:
# get the next item in the queue
upload_url, filename, verify_certificates = self.upload_queue.get()
filesize = os.path.getsize(filename)
# upload the file to our DSS session workspace
with open(filename, 'rb') as f:
resp = requests.post(upload_url, data=f, verify=verify_certificates)
resp.raise_for_status()
data = resp.json()
assert filesize == int(data['size'])
# Tell the queue that we are done
self.upload_queue.task_done()
Swen Vermeul
committed
class DataSetDownloadQueue:
def __init__(self, workers=20):
# maximum files to be downloaded at once
self.download_queue = Queue()
# define number of threads
for t in range(workers):
t = Thread(target=self.download_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the download queue
"""
self.download_queue.put(things)
Swen Vermeul
committed
def join(self):
""" needs to be called if you want to wait for all downloads to be finished
"""
self.download_queue.join()
def download_file(self):
while True:
url, filename, file_size, verify_certificates = self.download_queue.get()
Swen Vermeul
committed
# create the necessary directory structure if they don't exist yet
os.makedirs(os.path.dirname(filename), exist_ok=True)
# request the file in streaming mode
r = requests.get(url, stream=True, verify=verify_certificates)
Swen Vermeul
committed
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
assert os.path.getsize(filename) == int(file_size)
Swen Vermeul
committed
self.download_queue.task_done()
class DataSet():
""" DataSet are openBIS objects that contain the actual files.
"""
def __init__(self, openbis_obj, data):
self.data = data
self.permid = data["code"]
self.permId = data["code"]
if data['physicalData'] is None:
self.shareId = None
self.location = None
else:
self.shareId = data['physicalData']['shareId']
self.location = data['physicalData']['location']
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>attribute</th>
<th>value</th>
</tr>
</thead>
<tbody>
<tr> <th>permId</th> <td>{}</td> </tr>
<tr> <th>properties</th> <td>{}</td> </tr>
<tr> <th>tags</th> <td>{}</td> </tr>
</tbody>
</table>
"""
return html.format(self.permid, self.data['properties'], self.data['tags'])
Swen Vermeul
committed
def download(self, files=None, wait_until_finished=True, workers=10):
""" download the actual files and put them by default in the following folder:
Swen Vermeul
committed
__current_dir__/hostname/dataset_permid/
If no files are specified, all files of a given dataset are downloaded.
Files are usually downloaded in parallel, using 10 workers by default. If you want to wait until
all the files are downloaded, set the wait_until_finished option to True.
Swen Vermeul
committed
"""
if files == None:
files = self.file_list()
elif isinstance(files, str):
files = [files]
base_url = self.data['dataStore']['downloadUrl'] + '/datastore_server/' + self.permid + '/'
Swen Vermeul
committed
queue = DataSetDownloadQueue(workers=workers)
# get file list and start download
for filename in files:
file_info = self.get_file_list(start_folder=filename)
file_size = file_info[0]['fileSize']
download_url = base_url + filename + '?sessionID=' + self.openbis.token
filename = os.path.join(self.openbis.hostname, self.permid, filename)
queue.put([download_url, filename, file_size, self.openbis.verify_certificates])
Swen Vermeul
committed
# wait until all files have downloaded
if wait_until_finished:
queue.join()
print("Files downloaded to: %s" % os.path.join(self.openbis.hostname, self.permid))
def get_parents(self):
return self.openbis.get_datasets(withChildren=self.permid)
def get_children(self):
return self.openbis.get_datasets(withParents=self.permid)
def file_list(self):
files = []
for file in self.get_file_list(recursive=True):
if file['isDirectory']:
pass
else:
files.append(file['pathInDataSet'])
return files
def get_files(self, start_folder='/'):
""" Returns a DataFrame of all files in this dataset
"""
def createRelativePath(pathInDataSet):
if self.shareId is None:
return ''
else:
return os.path.join(self.shareId, self.location, pathInDataSet)
files = self.get_file_list(start_folder=start_folder)
df = DataFrame(files)
df['relativePath'] = df['pathInDataSet'].map(createRelativePath)
df['crc32Checksum'] = df['crc32Checksum'].fillna(0.0).astype(int).map(signed_to_unsigned)
return df[['isDirectory', 'pathInDataSet', 'fileSize', 'crc32Checksum']]
def get_file_list(self, recursive=True, start_folder="/"):
""" Lists all files of a given dataset. You can specifiy a start_folder other than "/".
By default, all directories and their containing files are listed recursively. You can
turn off this option by setting recursive=False.
"""
request = {
"method" : "listFilesForDataSet",
"params" : [
self.openbis.token,
self.permid,
"id":"1"
self.data["dataStore"]["downloadUrl"] + '/datastore_server/rmi-dss-api-v1.json',
json.dumps(request),
verify=self.openbis.verify_certificates
)
data = resp.json()
if 'error' in data:
Swen Vermeul
committed
raise ValueError('Error from openBIS: ' + data['error'] )
elif 'result' in data:
return data['result']
Swen Vermeul
committed
raise ValueError('request to openBIS did not return either result nor error')
Swen Vermeul
committed
raise ValueError('internal error while performing post request')
class Vocabulary():
def __init__(self, data):
self.data = data
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
@property
def terms_kv(self):
return [
{voc["code"]:voc["label"] }
for voc
in sorted(self.data['objects'], key=lambda v: v["ordinal"])
]
@property
def terms(self):
return [
voc["code"]
for voc
in sorted(self.data['objects'], key=lambda v: v["ordinal"])
]
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>vocabulary term</th>
<th>label</th>
<th>description</th>
<th>vocabulary</th>
</tr>
</thead>
<tbody>
"""
for voc in sorted(
self.data['objects'],
key=lambda v: (v["permId"]["vocabularyCode"], v["ordinal"])
):
html += "<tr> <td>{}</td> <td>{}</td> <td>{}</td> <td>{}</td> </tr>".format(
voc['code'],
voc['label'],
voc['description'],
voc['permId']['vocabularyCode']
)
html += """
</tbody>
</table>
"""
return html
class PropertyHolder():
def __init__(self, openbis_obj, type):
self.__dict__['_openbis'] = openbis_obj
self.__dict__['_type'] = type
self.__dict__['_property_names'] = []
for prop in type.data['propertyAssignments']:
self._property_names.append(prop['propertyType']['code'].lower())
def _get_terms(self, vocabulary):
return self._openbis.get_terms(vocabulary)
def _all_props(self):
props = {}
for code in self._type.codes():
props[code] = getattr(self, code)
return props
def __getattr__(self, name):
if name.endswith('_'):
name = name.rstrip('_')
property_type = self._type.prop[name]['propertyType']
if property_type['dataTypeCode'] == 'CONTROLLEDVOCABULARY':
return self._openbis.get_terms(name)
else:
return { property_type["label"] : property_type["dataTypeCode"]}
else: return None
def __setattr__(self, name, value):
if name not in self._property_names:
raise KeyError("No such property: {}".format(name))
property_type = self._type.prop[name]['propertyType']
data_type = property_type['dataTypeCode']
if data_type == 'CONTROLLEDVOCABULARY':
voc = self._openbis.get_terms(name)
if value not in voc.terms:
raise ValueError("Value must be one of these terms: " + ", ".join(voc.terms))
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
elif data_type in ('INTEGER', 'BOOLEAN', 'VARCHAR'):
if not check_datatype(data_type, value):
raise ValueError("Value must be of type {}".format(data_type))
self.__dict__[name] = value
def __dir__(self):
return self._property_names
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>property</th>
<th>value</th>
</tr>
</thead>
<tbody>
"""
for prop in self._property_names:
value = ''
try:
value = getattr(self, prop)
except Exception:
pass
html += "<tr> <td>{}</td> <td>{}</td> </tr>".format(
prop,
value
)
html += """
</tbody>
</table>
"""
return html
class AttrHolder():
""" General class for both samples and experiments that hold all common attributes, such as:
- space
- experiment
- parents
- children
- tags
"""
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
def __init__(self, openbis_obj, type):
self.__dict__['_openbis'] = openbis_obj
self.__dict__['_type_obj'] = type
entity_type = type.data['@type'].split('.')[2].capitalize()
self.__dict__['_entity_type'] = entity_type
self.__dict__['_allowed_attrs'] = _definitions(entity_type)['attrs']
self.__dict__['_identifier'] = None
self.__dict__['_is_new'] = True
def __call__(self, data):
self.__dict__['_is_new'] = False
for key in "code permId identifier type container components attachments".split():
self.__dict__['_'+key] = data.get(key, None)
for key in "space project experiment".split():
d = data.get(key, None)
if d is not None:
d = d['permId']
self.__dict__['_'+key] = d
for key in "parents children".split():
self.__dict__['_'+key] = []
for item in data[key]:
self.__dict__['_'+key].append(item['identifier'])
for key in "tags".split():
self.__dict__['_'+key] = []
for item in data[key]:
self.__dict__['_'+key].append({
"code": item['code'],
"@type": "as.dto.tag.id.TagCode"
})
def _all_attrs(self):
defs = _definitions(self._entity_type)
attr2ids = _definitions('attr2ids')
ids2type = _definitions('ids2type')
request = {}
for attr in defs['attrs']:
# only continue if attribute is actually defined
if '_'+attr in self.__dict__:
# handle multivalue attributes (parents, children, tags etc.)
if attr in defs['multi']:
items = self.__dict__.get('_'+attr, [])
if items == None:
items = []
request[attr2ids[attr]] = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateActionSet",
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
else:
ids = attr2ids[attr]
request[ids] = self.__dict__.get('_'+attr, None)
if self.__dict__.get('_code', None) is None:
request['autoGeneratedCode'] = True
else:
pass
return request
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
def __getattr__(self, name):
""" handles all attribute requests dynamically. Values are returned in a sensible way,
for example the identifiers of parents, children and components are returned
as an array of values.
"""
int_name = '_'+name
if int_name in self.__dict__:
if int_name in ["_experiment"]:
exp = ''
try:
exp = self.__dict__[int_name]['identifier']['identifier']
except TypeError:
pass
return exp
elif isinstance(self.__dict__[int_name], list):
values = []
for item in self.__dict__[int_name]:
values.append(
item.get("code",
item.get("identifier",
item.get("permId", None)))
)
return values
elif isinstance(self.__dict__[int_name], dict):
return self.__dict__[int_name].get(name,
self.__dict__[int_name].get("code",
self.__dict__[int_name].get("identifier",
self.__dict__[int_name].get("permId", None))))
else:
return self.__dict__[int_name]
else:
return None
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
def __setattr__(self, name, value):
if name in ["parents", "children", "components"]:
if not isinstance(value, list):
value = [value]
objs = []
for val in value:
# fetch objects in openBIS, make sure they actually exists
obj = getattr(self._openbis, 'get_'+self._entity_type.lower())(val)
objs.append(obj)
self.__dict__['_'+name] = {
"@type": "as.dto.common.update.IdListUpdateValue",
"actions": [{
"@type": "as.dto.common.update.ListUpdateActionSet",
"items": [item._permId for item in objs]
}]
}
elif name in ["tags"]:
tags = []
for val in value:
tags.append({
"@type": "as.dto.tag.id.TagCode",
"code": val
})
self.__dict__['_tags'] = tags
elif name in ["space", "project", "experiment"]:
# fetch object in openBIS, make sure it actually exists
obj = getattr(self._openbis, "get_"+name)(value)
self.__dict__['_'+name] = obj['permId']
# mark attribute as modified, if it's an existing entity
if self.__dict__['_is_new']:
pass
else:
self.__dict__['_'+name]['is_modified'] = True
elif name in ["identifier", "project"]:
raise KeyError("you can not modify the {}".format(name))
elif name == "code":
if self.__dict__['_type_obj'].data['autoGeneratedCode']:
raise KeyError("for this {}Type you can not set a code".format(self.__dict__['_entity_type']))
else:
self.__dict__['_code'] = value
else:
raise KeyError("no such attribute: {}".format(name))
def get_type(self):
return self._type_obj
def get_parents(self):
return getattr(self._openbis, 'get_'+self._entity_type.lower()+'s')(withChildren=self.identifier)
def get_children(self):
return getattr(self._openbis, 'get_'+self._entity_type.lower()+'s')(withParents=self.identifier)
@property
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
def set_tags(self, tags):
tagIds = _tagIds_for_tags(tags, 'Set')
self.openbis.update_sample(self.permId, tagIds=tagIds)
def add_tags(self, tags):
tagIds = _tagIds_for_tags(tags, 'Add')
self.openbis.update_sample(self.permId, tagIds=tagIds)
def del_tags(self, tags):
tagIds = _tagIds_for_tags(tags, 'Remove')
self.openbis.update_sample(self.permId, tagIds=tagIds)
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>attribute</th>
<th>value</th>
</tr>
</thead>
<tbody>
"""
for key in ["identifier","permId", "code", "type", "space","project", "experiment", "parents", "children", "tags"]:
html += "<tr> <td>{}</td> <td>{}</td> </tr>".format(key, getattr(self, key, None))
html += """
</tbody>
</table>
"""
return html
class Sample():
""" A Sample is one of the most commonly used objects in openBIS.
"""
def __init__(self, openbis_obj, type, data=None, **kwargs):
self.__dict__['openbis'] = openbis_obj
self.__dict__['type'] = type
self.__dict__['p'] = PropertyHolder(openbis_obj, type)
self.__dict__['a'] = AttrHolder(openbis_obj, type)
if data is not None:
self._set_data(data)
if kwargs is not None:
for key in kwargs:
setattr(self, key, kwargs[key])
def _set_data(self, data):
# assign the attribute data to self.a by calling it
# (invoking the AttrHolder.__call__ function)
self.a(data)
self.__dict__['data'] = data
# put the properties in the self.p namespace (without checking them)
for key, value in data['properties'].items():
self.p.__dict__[key.lower()] = value
@property
def type(self):
return self.__dict__['type'].data['code']
@type.setter
def type(self, type_name):
sample_type = self.openbis.get_sample_type(type_name)
self.p.__dict__['_type'] = sample_type
self.a.__dict__['_type'] = sample_type
def __getattr__(self, name):
return getattr(self.__dict__['a'], name)
def __setattr__(self, name, value):
if name in ['set_properties', 'set_tags', 'add_tags']:
raise ValueError("These are methods which should not be overwritten")
setattr(self.__dict__['a'], name, value)
def _repr_html_(self):
html = self.a._repr_html_()
return html
def set_properties(self, properties):
self.openbis.update_sample(self.permId, properties=properties)
def save(self):
props = self.p._all_props()
attrs = self.a._all_attrs()
attrs["properties"] = props
if self.identifier is None:
# create a new sample
attrs["@type"] = "as.dto.sample.create.SampleCreation"
attrs["typeId"] = self.__dict__['type'].data['permId']
request = {
"method": "createSamples",
"params": [ self.openbis.token,
[ attrs ]
]
}
resp = self.openbis._post_request(self.openbis.as_v3, request)
new_sample_data = self.openbis.get_sample(resp[0]['permId'], only_data=True)
self._set_data(new_sample_data)
return self
else:
attrs["@type"] = "as.dto.sample.update.SampleUpdate"
attrs["sampleId"] = {
"permId": self.permId,
"@type": "as.dto.sample.id.SamplePermId"
}
request = {
"method": "updateSamples",
"params": [ self.openbis.token,
[ attrs ]
]
}
resp = self.openbis._post_request(self.openbis.as_v3, request)
print('Sample successfully updated')
self.openbis.delete_entity('sample', self.permId, reason)
def get_datasets(self):
parse_jackson(objects)
datasets = DataFrame(objects)
datasets['registrationDate'] = datasets['registrationDate'].map(format_timestamp)
datasets['properties'] = datasets['properties'].map(extract_properties)
datasets['type'] = datasets['type'].map(extract_code)
return datasets
class Space(dict):
""" managing openBIS spaces
"""
def __init__(self, openbis_obj, *args, **kwargs):
super(Space, self).__init__(*args, **kwargs)
self.__dict__ = self
self.openbis = openbis_obj
""" Lists all samples in a given space. A pandas DataFrame object is returned.
"""
return self.openbis.get_samples(space=self.code, *args, **kwargs)
@property
def projects(self):
return self.openbis.get_projects(space=self.code)
def new_project(self, **kwargs):
return self.openbis.new_project(space=self.code, **kwargs)
@property
def experiments(self):
return self.openbis.get_experiments(space=self.code)
class Things():
"""An object that contains a DataFrame object about an entity available in openBIS.
"""
def __init__(self, openbis_obj, what, df, identifier_name='code'):
self.openbis = openbis_obj
self.what = what
self.df = df
self.identifier_name = identifier_name
def _repr_html_(self):
return self.df._repr_html_()
def __getitem__(self, key):
if self.df is not None and len(self.df) > 0:
row = None
if isinstance(key, int):
# get thing by rowid
row = self.df.loc[[key]]
elif isinstance(key, list):
# treat it as a normal dataframe
return self.df[key]
else:
# get thing by code
row = self.df[self.df[self.identifier_name]==key.upper()]
if row is not None:
# invoke the openbis.get_what() method
return getattr(self.openbis, 'get_'+self.what)(row[self.identifier_name].values[0])
class Experiment():
""" managing openBIS experiments
"""
def __init__(self, openbis_obj, data):
self.permId = data['permId']['permId']
self.identifier = data['identifier']['identifier']
self.properties = data['properties']
self.tags = extract_tags(data['tags'])
self.attachments = extract_attachments(data['attachments'])
self.project = data['project']['code']
self.data = data
def set_properties(self, properties):
self.openbis.update_experiment(self.permId, properties=properties)
def set_tags(self, tags):
tagIds = _tagIds_for_tags(tags, 'Set')
self.openbis.update_experiment(self.permId, tagIds=tagIds)
def add_tags(self, tags):
tagIds = _tagIds_for_tags(tags, 'Add')
self.openbis.update_experiment(self.permId, tagIds=tagIds)
def del_tags(self, tags):
tagIds = _tagIds_for_tags(tags, 'Remove')
self.openbis.update_experiment(self.permId, tagIds=tagIds)
def delete(self, reason):
self.openbis.delete_entity('experiment', self.permId, reason)
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>attribute</th>
<th>value</th>
</tr>
</thead>
<tbody>
<tr> <th>permId</th> <td>{}</td> </tr>
<tr> <th>identifier</th> <td>{}</td> </tr>
<tr> <th>project</th> <td>{}</td> </tr>
<tr> <th>properties</th> <td>{}</td> </tr>
<tr> <th>tags</th> <td>{}</td> </tr>
<tr> <th>attachments</th> <td>{}</td> </tr>
</tbody>
</table>
"""
return html.format(self.permId, self.identifier, self.project, self.properties, self.tags, self.attachments)
data["identifier"] = self.identifier
data["permId"] = self.permId
data["properties"] = self.properties
data["tags"] = self.tags
class PropertyAssignments():
""" holds are properties, that are assigned to an entity, eg. sample or experiment
"""
def __init__(self, openbis_obj, data):
self.openbis = openbis_obj
self.data = data
self.prop = {}
for pa in self.data['propertyAssignments']:
self.prop[pa['propertyType']['code'].lower()] = pa
def codes(self):
codes = []
for pa in self.data['propertyAssignments']:
codes.append(pa['propertyType']['code'].lower())
return codes
def _repr_html_(self):
html = """
<p>{}: <b>{}</b>
<p>description: {}</p>
<p>Code autogenerated: {}</p>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>property</th>
<th>label</th>
<th>description</th>
<th>dataTypeCode</th>
<th>mandatory</th>
</tr>
</thead>
<tbody>
""".format(
self.data['@type'].split('.')[-1],
self.data['code'],
self.data['description'],
self.data['autoGeneratedCode']
)
for pa in self.data['propertyAssignments']:
html += "<tr> <th>{}</th> <td>{}</td> <td>{}</td> <td>{}</td> <td>{}</td> </tr>".format(
pa['propertyType']['code'].lower(),
pa['propertyType']['label'],
pa['propertyType']['description'],
pa['propertyType']['dataTypeCode'],
pa['mandatory']
)
html += """
</tbody>
</table>
"""
return html