Newer
Older
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
pybis.py
"""
import os
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
Swen Vermeul
committed
import time
Swen Vermeul
committed
import base64
from collections import namedtuple
from pybis.utils import parse_jackson, check_datatype, split_identifier, format_timestamp, is_identifier, is_permid
from pybis.property import PropertyHolder, PropertyAssignments
from pybis.masterdata import Vocabulary
import pandas as pd
from pandas import DataFrame, Series
Swen Vermeul
committed
import threading
from threading import Thread
from queue import Queue
DROPBOX_PLUGIN = "jupyter-uploader-api"
entities = {
"Sample": {
"attrs": "code permId identifier type attachments space experiment container components tags ".split(),
"ids2type": {
'parentIds': { 'permId': { '@type': 'as.dto.sample.id.SamplePermId' } },
'childIds': { 'permId': { '@type': 'as.dto.sample.id.SamplePermId' } },
'componentIds': { 'permId': {'@type': 'as.dto.sample.id.SamplePermId' } },
},
"identifier": "sampleId",
"cre_type": "as.dto.sample.create.SampleCreation",
"multi": "parents children components tags".split(),
},
"Space": {
"attrs": "code description registrator registrationDate modificationDate".split(),
"identifier": "spaceId",
},
"Experiment": {
"attrs": "code permId identifier type space project tags attachments".split(),
"multi": "tags".split(),
"identifier": "experimentId",
Swen Vermeul
committed
},
"Project": {
"attrs": "code description permId identifier space leader registrator registrationDate modifier modificationDate attachments".split(),
Swen Vermeul
committed
"multi": "tags".split(),
"identifier": "projectId",
},
"DataSet": {
"autoGeneratedCode" : True,
"attrs": "code permId type experiment sample parents children container components tags".split(),
"ids2type": {
'parentIds': { 'permId': { '@type': 'as.dto.dataset.id.DataSetPermId' } },
'childIds': { 'permId': { '@type': 'as.dto.dataset.id.DataSetPermId' } },
},
"multi": [],
"identifier": "dataSetId",
},
"attr2ids": {
"sample" : "sampleId",
"experiment" : "experimentId",
"space" : "spaceId",
"container" : "containerId",
"component" : "componentId",
"components" : "componentIds",
"parents" : "parentIds",
"children" : "childIds",
"tags" : "tagIds",
},
"ids2type": {
'spaceId': { 'permId': { '@type': 'as.dto.space.id.SpacePermId' } },
'projectId': { 'permId': { '@type': 'as.dto.project.id.ProjectPermId' } },
'experimentId': { 'permId': { '@type': 'as.dto.experiment.id.ExperimentPermId' } },
'tagIds': { 'code': { '@type': 'as.dto.tag.id.TagCode' } },
},
}
"space": "as.dto.space.search.SpaceSearchCriteria",
"project": "as.dto.project.search.ProjectSearchCriteria",
"experiment": "as.dto.experiment.search.ExperimentSearchCriteria",
"sample": "as.dto.sample.search.SampleSearchCriteria",
"dataset": "as.dto.dataset.search.DataSetSearchCriteria",
"code": "as.dto.common.search.CodeSearchCriteria",
"sample_type":"as.dto.sample.search.SampleTypeSearchCriteria",
"space": { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" },
"project": { "@type": "as.dto.project.fetchoptions.ProjectFetchOptions" },
"experiment": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" },
"sample": { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
Swen Vermeul
committed
"samples": { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
"dataSets": {
"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions",
"properties": { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
"type": { "@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions" },
},
"physicalData": { "@type": "as.dto.dataset.fetchoptions.PhysicalDataFetchOptions" },
"linkedData": { "@type": "as.dto.dataset.fetchoptions.LinkedDataFetchOptions" },
"properties": { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
Swen Vermeul
committed
"propertyAssignments" : {
"@type" : "as.dto.property.fetchoptions.PropertyAssignmentFetchOptions",
"propertyType": {
"@type": "as.dto.property.fetchoptions.PropertyTypeFetchOptions"
}
},
"tags": { "@type": "as.dto.tag.fetchoptions.TagFetchOptions" },
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"leader": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"attachments": { "@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions" },
Swen Vermeul
committed
"attachmentsWithContent": {
"@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions",
"content": {
"@type": "as.dto.common.fetchoptions.EmptyFetchOptions"
},
},
"history": { "@type": "as.dto.history.fetchoptions.HistoryEntryFetchOptions" },
"dataStore": { "@type": "as.dto.datastore.fetchoptions.DataStoreFetchOptions" },
def search_request_for_identifier(ident, entity):
search_request = {}
Swen Vermeul
committed
if is_identifier(ident):
search_request = {
"identifier": ident.upper(),
"@type": "as.dto.{}.id.{}Identifier".format(entity.lower(), entity.capitalize())
}
else:
search_request = {
"permId": ident,
"@type": "as.dto.{}.id.{}PermId".format(entity.lower(), entity.capitalize())
}
return search_request
if not isinstance(obj, dict):
return str(obj)
def extract_deletion(obj):
del_objs = []
for deleted_object in obj['deletedObjects']:
del_objs.append({
"reason": obj['reason'],
"permId": deleted_object["id"]["permId"],
"type": deleted_object["id"]["@type"]
})
return del_objs
def extract_identifier(ident):
if not isinstance(ident, dict):
return str(ident)
return ident['identifier']
def extract_nested_identifier(ident):
if not isinstance(ident, dict):
return str(ident)
return ident['identifier']['identifier']
def extract_permid(permid):
if not isinstance(permid, dict):
return str(permid)
return permid['permId']
def extract_nested_permid(permid):
if not isinstance(permid, dict):
return str(permid)
return permid['permId']['permId']
def extract_property_assignments(pas):
pa_strings = []
for pa in pas:
if not isinstance(pa['propertyType'], dict):
pa_strings.append(pa['propertyType'])
else:
pa_strings.append(pa['propertyType']['label'])
return pa_strings
def extract_person(person):
if 'email' in person and person['email'] is not '':
return "%s %s <%s>" % (person['firstName'], person['lastName'], person['email'])
else:
return "%s %s" % (person['firstName'], person['lastName'])
"""since Python3 the zlib module returns unsigned integers (2.7: signed int)
"""
prev = 0
for eachLine in open(fileName,"rb"):
prev = zlib.crc32(eachLine, prev)
# return as hex
return "%x"%(prev & 0xFFFFFFFF)
def _create_tagIds(tags=None):
if tags is None:
return None
tagIds = []
for tag in tags:
tagIds.append({ "code": tag, "@type": "as.dto.tag.id.TagCode" })
return tagIds
def _tagIds_for_tags(tags=None, action='Add'):
"""creates an action item to add or remove tags. Action is either 'Add', 'Remove' or 'Set'
"""
if tags is None:
return
if not isinstance(tags, list):
tags = [tags]
items = []
for tag in tags:
items.append({
"code": tag,
"@type": "as.dto.tag.id.TagCode"
})
tagIds = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
def _list_update(ids=None, entity=None, action='Add'):
"""creates an action item to add, set or remove ids.
"""
if ids is None:
return
if not isinstance(ids, list):
ids = [ids]
items = []
for ids in ids:
items.append({
"code": ids,
"@type": "as.dto.{}.id.{}Code".format(entity.lower(), entity)
})
list_update = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
return list_update
def _create_typeId(type):
return {
"permId": type.upper(),
"@type": "as.dto.entitytype.id.EntityTypePermId"
}
def _create_projectId(ident):
match = re.match('/', ident)
if match:
return {
"identifier": ident,
"@type": "as.dto.project.id.ProjectIdentifier"
}
else:
return {
"permId": ident,
"@type": "as.dto.project.id.ProjectPermId"
}
def _common_search(search_type, value, comparison="StringEqualToValue"):
sreq = {
"@type": search_type,
"fieldValue": {
"value": value,
"@type": "as.dto.common.search.{}".format(comparison)
}
}
return sreq
def _criteria_for_code(code):
return {
"fieldValue": {
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
},
"@type": "as.dto.common.search.CodeSearchCriteria"
}
def _subcriteria_for_type(code, entity):
return {
"@type": "as.dto.{}.search.{}TypeSearchCriteria".format(entity.lower(), entity),
"criteria": [
{
"@type": "as.dto.common.search.CodeSearchCriteria",
"fieldValue": {
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
}
}
]
}
sreq = {}
for key, val in req.items():
if key == "criteria":
items = []
for item in req['criteria']:
items.append(_gen_search_criteria(item))
sreq['criteria'] = items
elif key == "code":
sreq["criteria"] = [_common_search(
"as.dto.common.search.CodeSearchCriteria", val.upper()
)]
elif key == "permid":
sreq["criteria"] = [_common_search(
"as.dto.common.search.PermIdSearchCriteria", val
)]
elif key == "identifier":
sreq["criteria"] = []
if "space" in si:
sreq["criteria"].append(
_gen_search_criteria({ "space": "Space", "code": si["space"] })
)
if "experiment" in si:
pass
if "code" in si:
sreq["criteria"].append(
_common_search(
"as.dto.common.search.CodeSearchCriteria", si["code"].upper()
)
)
elif key == "operator":
sreq["operator"] = val.upper()
else:
sreq["@type"] = "as.dto.{}.search.{}SearchCriteria".format(key, val)
return sreq
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
def _subcriteria_for_tags(tags):
if not isinstance(tags, list):
tags = [tags]
criterias = []
for tag in tags:
criterias.append({
"fieldName": "code",
"fieldType": "ATTRIBUTE",
"fieldValue": {
"value": tag,
"@type": "as.dto.common.search.StringEqualToValue"
},
"@type": "as.dto.common.search.CodeSearchCriteria"
})
return {
"@type": "as.dto.tag.search.TagSearchCriteria",
"operator": "AND",
"criteria": criterias
}
def _subcriteria_for_is_finished(is_finished):
return {
"@type": "as.dto.common.search.StringPropertySearchCriteria",
"fieldName": "FINISHED_FLAG",
"fieldType": "PROPERTY",
"fieldValue": {
"value": is_finished,
"@type": "as.dto.common.search.StringEqualToValue"
}
}
def _subcriteria_for_properties(prop, val):
return {
"@type": "as.dto.common.search.StringPropertySearchCriteria",
"fieldName": prop.upper(),
"fieldType": "PROPERTY",
"fieldValue": {
"value": val,
"@type": "as.dto.common.search.StringEqualToValue"
}
}
def _subcriteria_for_permid(permids, entity, parents_or_children=''):
if not isinstance(permids, list):
permids = [permids]
criterias = []
for permid in permids:
criterias.append( {
"@type": "as.dto.common.search.PermIdSearchCriteria",
"fieldValue": {
"value": permid,
"@type": "as.dto.common.search.StringEqualToValue"
},
"fieldType": "ATTRIBUTE",
"fieldName": "code"
} )
criteria = {
"criteria": criterias,
"@type": "as.dto.{}.search.{}{}SearchCriteria".format(
entity.lower(), entity, parents_or_children
),
"operator": "OR"
}
return criteria
def _subcriteria_for_code(code, object_type):
if is_permid(code):
fieldname = "permId"
fieldtype = "as.dto.common.search.PermIdSearchCriteria"
else:
fieldname = "code"
fieldtype = "as.dto.common.search.CodeSearchCriteria"
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
"@type": search_criteria[object_type.lower()],
object_type.lower() : object_type.capitalize(),
"operator": "AND",
"code": code
})
class Openbis:
"""Interface for communicating with openBIS. A current version of openBIS is needed.
(minimum version 16.05).
Swen Vermeul
committed
def __init__(self, url='https://localhost:8443', verify_certificates=True, token=None):
"""Initialize a new connection to an openBIS server.
"""
url_obj = urlparse(url)
if url_obj.netloc is None:
raise ValueError("please provide the url in this format: https://openbis.host.ch:8443")
self.url_obj = url_obj
self.url = url_obj.geturl()
self.port = url_obj.port
self.hostname = url_obj.hostname
self.as_v3 = '/openbis/openbis/rmi-application-server-v3.json'
self.as_v1 = '/openbis/openbis/rmi-general-information-v1.json'
self.reg_v1 = '/openbis/openbis/rmi-query-v1.json'
Chandrasekhar Ramakrishnan
committed
self.verify_certificates = verify_certificates
Swen Vermeul
committed
self.token = token
self.dataset_types = None
self.sample_types = None
Swen Vermeul
committed
self.files_in_wsp = []
Swen Vermeul
committed
self.token_path = None
# use an existing token, if available
if self.token is None:
@property
def spaces(self):
return self.get_spaces()
@property
def projects(self):
return self.get_projects()
Swen Vermeul
committed
"""Read the token from the cache, and set the token ivar to it, if there, otherwise None.
If the token is not valid anymore, delete it.
"""
token_path = self.gen_token_path()
Chandrasekhar Ramakrishnan
committed
if not os.path.exists(token_path):
Swen Vermeul
committed
return None
Chandrasekhar Ramakrishnan
committed
try:
with open(token_path) as f:
Swen Vermeul
committed
token = f.read()
if not self.is_token_valid(token):
Chandrasekhar Ramakrishnan
committed
os.remove(token_path)
Swen Vermeul
committed
return None
else:
return token
except FileNotFoundError:
Swen Vermeul
committed
return None
Swen Vermeul
committed
def gen_token_path(self, parent_folder=None):
"""generates a path to the token file.
The token is usually saved in a file called
~/.pybis/hostname.token
"""
Chandrasekhar Ramakrishnan
committed
if parent_folder is None:
Swen Vermeul
committed
# save token under ~/.pybis folder
parent_folder = os.path.join(
os.path.expanduser("~"),
'.pybis'
)
path = os.path.join(parent_folder, self.hostname + '.token')
Chandrasekhar Ramakrishnan
committed
return path
Swen Vermeul
committed
def save_token(self, token=None, parent_folder=None):
""" saves the session token to the disk, usually here: ~/.pybis/hostname.token. When a new Openbis instance is created, it tries to read this saved token by default.
Swen Vermeul
committed
if token is None:
token = self.token
token_path = None;
if parent_folder is None:
token_path = self.gen_token_path()
else:
token_path = self.gen_token_path(parent_folder)
# create the necessary directories, if they don't exist yet
Chandrasekhar Ramakrishnan
committed
os.makedirs(os.path.dirname(token_path), exist_ok=True)
with open(token_path, 'w') as f:
Swen Vermeul
committed
f.write(token)
self.token_path = token_path
def delete_token(self, token_path=None):
Swen Vermeul
committed
if token_path is None:
token_path = self.token_path
os.remove(token_path)
def _post_request(self, resource, data):
""" internal method, used to handle all post requests and serializing / deserializing
data
"""
if "id" not in data:
data["id"] = "1"
if "jsonrpc" not in data:
data["jsonrpc"] = "2.0"
resp = requests.post(
self.url + resource,
json.dumps(data),
verify=self.verify_certificates
)
data = resp.json()
if 'error' in data:
raise ValueError('an error has occured: ' + data['error']['message'] )
elif 'result' in data:
return data['result']
else:
raise ValueError('request did not return either result nor error')
else:
raise ValueError('general error while performing post request')
""" Log out of openBIS. After logout, the session token is no longer valid.
if self.token is None:
return
logout_request = {
"method":"logout",
"params":[self.token],
}
resp = self._post_request(self.as_v3, logout_request)
Swen Vermeul
committed
self.token = None
self.token_path = None
Swen Vermeul
committed
def login(self, username=None, password=None, save_token=False):
"""Log into openBIS.
Expects a username and a password and updates the token (session-ID).
The token is then used for every request.
Chandrasekhar Ramakrishnan
committed
Clients may want to store the credentials object in a credentials store after successful login.
Throw a ValueError with the error message if login failed.
"""
login_request = {
"method":"login",
"params":[username, password],
}
result = self._post_request(self.as_v3, login_request)
if result is None:
raise ValueError("login to openBIS failed")
else:
self.token = result
Swen Vermeul
committed
if save_token:
self.save_token()
return self.token
def get_datastores(self):
""" Get a list of all available datastores. Usually there is only one, but in some cases
there might be more. If you upload a file, you need to specifiy the datastore you want
the file uploaded to.
"""
if len(self.datastores) == 0:
request = {
"method": "listDataStores",
"params": [ self.token ],
}
resp = self._post_request(self.as_v1, request)
if resp is not None:
self.datastores = DataFrame(resp)[['code','downloadUrl', 'hostUrl']]
return self.datastores
else:
raise ValueError("No datastore found!")
else:
return self.datastores
def get_spaces(self, code=None):
""" Get a list of all available spaces (DataFrame object). To create a sample or a
dataset, you need to specify in which space it should live.
"""
criteria = {}
options = {}
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
spaces = DataFrame(resp['objects'])
spaces['registrationDate']= spaces['registrationDate'].map(format_timestamp)
spaces['modificationDate']= spaces['modificationDate'].map(format_timestamp)
sp = Things(
self,
'space',
spaces[['code', 'description', 'registrationDate', 'modificationDate']]
)
return sp
def get_space(self, spaceId):
""" Returns a Space object for a given identifier (spaceId).
"""
fetchopts = { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" }
for option in ['registrator']:
fetchopts[option] = fetch_option[option]
request = {
"method": "getSpaces",
"params": [
self.token,
[{
"permId": spaceId,
"@type": "as.dto.space.id.SpacePermId"
}],
resp = self._post_request(self.as_v3, request)
if len(resp) == 0:
raise ValueError("No such space: %s" % spaceId)
return Space(self, None, resp[spaceId])
Chandrasekhar Ramakrishnan
committed
def get_samples(self, code=None, permId=None, space=None, project=None, experiment=None, type=None,
withParents=None, withChildren=None, tags=None, **properties):
""" Get a list of all samples for a given space/project/experiment (or any combination)
"""
sub_criteria = []
if space:
sub_criteria.append(_gen_search_criteria({
"space": "Space",
"operator": "AND",
"code": space
})
)
exp_crit = _subcriteria_for_code(experiment, 'experiment')
proj_crit = _subcriteria_for_code(project, 'project')
exp_crit['criteria'] = []
exp_crit['criteria'].append(proj_crit)
sub_criteria.append(exp_crit)
sub_criteria.append(_subcriteria_for_code(experiment, 'experiment'))
if properties is not None:
for prop in properties:
sub_criteria.append(_subcriteria_for_properties(prop, properties[prop]))
if type:
sub_criteria.append(_subcriteria_for_code(type, 'sample_type'))
if tags:
sub_criteria.append(_subcriteria_for_tags(tags))
if code:
sub_criteria.append(_criteria_for_code(code))
if permId:
sub_criteria.append(_common_search("as.dto.common.search.PermIdSearchCriteria",permId))
if withParents:
if not isinstance(withParents, list):
withParents = [withParents]
for parent in withParents:
sub_criteria.append(
"sample": "SampleParents",
"identifier": parent
})
)
if withChildren:
if not isinstance(withChildren, list):
withChildren = [withChildren]
for child in withChildren:
sub_criteria.append(
"sample": "SampleChildren",
"identifier": child
})
)
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.sample.search.SampleSearchCriteria",
"operator": "AND"
}
"properties": { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
"tags": { "@type": "as.dto.tag.fetchoptions.TagFetchOptions" },
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"experiment": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" },
"type": { "@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions" },
"@type": "as.dto.sample.fetchoptions.SampleFetchOptions",
request = {
"method": "searchSamples",
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
objects = resp['objects']
parse_jackson(objects)
samples = DataFrame(objects)
if len(samples) is 0:
raise ValueError("No samples found!")
samples['registrationDate']= samples['registrationDate'].map(format_timestamp)
samples['modificationDate']= samples['modificationDate'].map(format_timestamp)
samples['registrator'] = samples['registrator'].map(extract_person)
samples['modifier'] = samples['modifier'].map(extract_person)
samples['identifier'] = samples['identifier'].map(extract_identifier)
samples['permId'] = samples['permId'].map(extract_permid)
samples['experiment'] = samples['experiment'].map(extract_nested_identifier)
samples['sample_type'] = samples['type'].map(extract_nested_permid)
ss = samples[['identifier', 'permId', 'experiment', 'sample_type', 'registrator', 'registrationDate', 'modifier', 'modificationDate']]
return Things(self, 'sample', ss, 'identifier')
else:
raise ValueError("No samples found!")
def get_experiments(self, code=None, type=None, space=None, project=None, tags=None, is_finished=None, **properties):
""" Get a list of all experiment for a given space or project (or any combination)
"""
sub_criteria = []
if space:
sub_criteria.append(_subcriteria_for_code(space, 'space'))
sub_criteria.append(_subcriteria_for_code(project, 'project'))
sub_criteria.append(_criteria_for_code(code))
if type:
sub_criteria.append(_subcriteria_for_type(type, 'Experiment'))
if tags:
sub_criteria.append(_subcriteria_for_tags(tags))
if is_finished is not None:
sub_criteria.append(_subcriteria_for_is_finished(is_finished))
if properties is not None:
for prop in properties:
sub_criteria.append(_subcriteria_for_properties(prop, properties[prop]))
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.experiment.search.ExperimentSearchCriteria",
"operator": "AND"
}
options = {
"properties": { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
"tags": { "@type": "as.dto.tag.fetchoptions.TagFetchOptions" },
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"project": { "@type": "as.dto.project.fetchoptions.ProjectFetchOptions" },
"type": { "@type": "as.dto.experiment.fetchoptions.ExperimentTypeFetchOptions" },
"@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions"
}
request = {
"method": "searchExperiments",
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if len(resp['objects']) == 0:
raise ValueError("No experiments found!")
objects = resp['objects']
parse_jackson(objects)
experiments = DataFrame(objects)
experiments['registrationDate']= experiments['registrationDate'].map(format_timestamp)
experiments['modificationDate']= experiments['modificationDate'].map(format_timestamp)
experiments['project']= experiments['project'].map(extract_code)
experiments['registrator'] = experiments['registrator'].map(extract_person)
experiments['modifier'] = experiments['modifier'].map(extract_person)
experiments['identifier'] = experiments['identifier'].map(extract_identifier)
experiments['type'] = experiments['type'].map(extract_code)
exps = experiments[['code', 'identifier', 'project', 'type', 'registrator',
'registrationDate', 'modifier', 'modificationDate']]
return Things(self, 'experiment', exps, 'identifier')
Swen Vermeul
committed
def get_datasets(self,
code=None, type=None,
withParents=None, withChildren=None,
Swen Vermeul
committed
):
sub_criteria = []
if code:
sub_criteria.append(_criteria_for_code(code))
if type:
sub_criteria.append(_subcriteria_for_type(type, 'DataSet'))
if withParents:
sub_criteria.append(_subcriteria_for_permid(withParents, 'DataSet', 'Parents'))
if withChildren:
sub_criteria.append(_subcriteria_for_permid(withChildren, 'DataSet', 'Children'))
if sample:
sub_criteria.append(_subcriteria_for_code(sample, 'Sample'))
if experiment:
sub_criteria.append(_subcriteria_for_code(experiment, 'Experiment'))
Swen Vermeul
committed
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.dataset.search.DataSetSearchCriteria",
"operator": "AND"
}
fetchopts = {
# "parents": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
# "children": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"containers": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"type": { "@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions" }
}
for option in ['tags', 'properties', 'sample']:
fetchopts[option] = fetch_option[option]
request = {
"method": "searchDataSets",
"params": [ self.token,
criteria,
fetchopts,
],
}
resp = self._post_request(self.as_v3, request)
objects = resp['objects']
if len(objects) == 0:
raise ValueError("no datasets found!")
else:
parse_jackson(objects)
datasets = DataFrame(objects)
datasets['registrationDate']= datasets['registrationDate'].map(format_timestamp)
datasets['modificationDate']= datasets['modificationDate'].map(format_timestamp)
datasets['sample']= datasets['sample'].map(extract_nested_identifier)
datasets['type']= datasets['type'].map(extract_code)
ds = Things(
self,
'dataset',
datasets[['code', 'properties', 'type', 'sample', 'registrationDate', 'modificationDate']]
)
return ds
Swen Vermeul
committed
def get_experiment(self, expId, withAttachments=False):
""" Returns an experiment object for a given identifier (expId).
"""
fetchopts = {
Swen Vermeul
committed
"@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions",
"type": {
"@type": "as.dto.experiment.fetchoptions.ExperimentTypeFetchOptions",
},
search_request = search_request_for_identifier(expId, 'experiment')
Swen Vermeul
committed
for option in ['tags', 'properties', 'attachments', 'project', 'samples']:
fetchopts[option] = fetch_option[option]
Swen Vermeul
committed
if withAttachments:
fetchopts['attachments'] = fetch_option['attachmentsWithContent']
request = {
"method": "getExperiments",
"params": [
self.token,
[ search_request ],
fetchopts
],
}
resp = self._post_request(self.as_v3, request)
if len(resp) == 0:
raise ValueError("No such experiment: %s" % expId)
Swen Vermeul
committed
return Experiment(self,
self.get_experiment_type(resp[expId]["type"]["code"]),
resp[expId]
)
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
def new_experiment(self, project_ident, code, type, properties=None, attachments=None, tags=None):
tagIds = _create_tagIds(tags)
typeId = _create_typeId(type)
projectId = _create_projectId(project_ident)
if properties is None:
properties = {}
request = {
"method": "createExperiments",
"params": [
self.token,
[
{
"properties": properties,
"code": code,
"typeId" : typeId,
"projectId": projectId,
"tagIds": tagIds,
"attachments": attachments,
"@type": "as.dto.experiment.create.ExperimentCreation",
}
]
],
}
resp = self._post_request(self.as_v3, request)
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
return self.get_experiment(resp[0]['permId'])
def update_experiment(self, experimentId, properties=None, tagIds=None, attachments=None):
params = {
"experimentId": {
"permId": experimentId,
"@type": "as.dto.experiment.id.ExperimentPermId"
},
"@type": "as.dto.experiment.update.ExperimentUpdate"
}
if properties is not None:
params["properties"]= properties
if tagIds is not None:
params["tagIds"] = tagIds
if attachments is not None:
params["attachments"] = attachments
request = {
"method": "updateExperiments",
"params": [
self.token,
[ params ]
]
}
self._post_request(self.as_v3, request)
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
def create_sample(self, space_ident, code, type,
project_ident=None, experiment_ident=None, properties=None, attachments=None, tags=None):
tagIds = _create_tagIds(tags)
typeId = _create_typeId(type)
projectId = _create_projectId(project_ident)
experimentId = _create_experimentId(experiment_ident)
if properties is None:
properties = {}
request = {
"method": "createSamples",
"params": [
self.token,
[
{
"properties": properties,
"code": code,
"typeId" : typeId,
"projectId": projectId,
"experimentId": experimentId,
"tagIds": tagIds,
"attachments": attachments,
"@type": "as.dto.sample.create.SampleCreation",
}
]
],
}
resp = self._post_request(self.as_v3, request)
return self.get_sample(resp[0]['permId'])
def update_sample(self, sampleId, space=None, project=None, experiment=None,
parents=None, children=None, components=None, properties=None, tagIds=None, attachments=None):
params = {
"sampleId": {
"permId": sampleId,
"@type": "as.dto.sample.id.SamplePermId"
},
"@type": "as.dto.sample.update.SampleUpdate"
}
if space is not None:
params['spaceId'] = space
if project is not None:
params['projectId'] = project
if properties is not None:
params["properties"]= properties
if tagIds is not None:
params["tagIds"] = tagIds
if attachments is not None:
params["attachments"] = attachments
request = {
"method": "updateSamples",
"params": [
self.token,
[ params ]
]
}
self._post_request(self.as_v3, request)
def delete_entity(self, entity, permid, reason):
"""Deletes Spaces, Projects, Experiments, Samples and DataSets
"""
entity_type = "as.dto.{}.id.{}PermId".format(entity.lower(), entity.capitalize())
request = {
"method": "delete" + entity.capitalize() + 's',
"params": [
self.token,
[
{
"permId": permid,
"@type": entity_type
}
],
{
"reason": reason,
"@type": "as.dto.{}.delete.{}DeletionOptions".format(entity.lower(), entity.capitalize())
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
}
]
}
self._post_request(self.as_v3, request)
def get_deletions(self):
request = {
"method": "searchDeletions",
"params": [
self.token,
{},
{
"deletedObjects": {
"@type": "as.dto.deletion.fetchoptions.DeletedObjectFetchOptions"
}
}
]
}
resp = self._post_request(self.as_v3, request)
objects = resp['objects']
parse_jackson(objects)
new_objs = []
for value in objects:
del_objs = extract_deletion(value)
if len(del_objs) > 0:
new_objs.append(*del_objs)
return DataFrame(new_objs)
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
def new_project(self, space_code, code, description, leaderId):
request = {
"method": "createProjects",
"params": [
self.token,
[
{
"code": code,
"spaceId": {
"permId": space_code,
"@type": "as.dto.space.id.SpacePermId"
},
"@type": "as.dto.project.create.ProjectCreation",
"description": description,
"leaderId": leaderId,
"attachments": None
}
]
],
}
resp = self._post_request(self.as_v3, request)
return resp
def _gen_fetchoptions(self, options):
fo = {}
for option in options:
fo[option] = fetch_option[option]
return fo
def get_project(self, projectId):
options = ['space', 'registrator', 'attachments']
if is_identifier(projectId):
request = self._create_get_request(
'getProjects', 'project', projectId, options
)
resp = self._post_request(self.as_v3, request)
return Project(self, resp[projectId])
else:
search_criteria = _gen_search_criteria({
'project': 'Project',
'operator': 'AND',
'code': projectId
})
fo = self._gen_fetchoptions(options)
request = {
"method": "searchProjects",
"params": [self.token, search_criteria, fo]
}
resp = self._post_request(self.as_v3, request)
return Project(self, resp['objects'][0])
def get_projects(self, space=None):
""" Get a list of all available projects (DataFrame object).
"""
sub_criteria = []
if space:
sub_criteria.append(_subcriteria_for_code(space, 'space'))
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.project.search.ProjectSearchCriteria",
"operator": "AND"
}
options = {
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"experiments": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions", },
"space": { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" },
"@type": "as.dto.project.fetchoptions.ProjectFetchOptions"
}
request = {
"method": "searchProjects",
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
objects = resp['objects']
parse_jackson(objects)
projects = DataFrame(objects)
if len(projects) is 0:
raise ValueError("No projects found!")
projects['registrationDate']= projects['registrationDate'].map(format_timestamp)
projects['modificationDate']= projects['modificationDate'].map(format_timestamp)
projects['registrator'] = projects['registrator'].map(extract_person)
projects['modifier'] = projects['modifier'].map(extract_person)
projects['permid'] = projects['permId'].map(extract_permid)
projects['identifier'] = projects['identifier'].map(extract_identifier)
projects['space'] = projects['space'].map(extract_code)
pros=projects[['code', 'space', 'registrator', 'registrationDate',
'modifier', 'modificationDate', 'permid', 'identifier']]
return Things(self, 'project', pros, 'identifier')
else:
raise ValueError("No projects found!")
def _create_get_request(self, method_name, entity, permids, options):
if not isinstance(permids, list):
permids = [permids]
type = "as.dto.{}.id.{}".format(entity.lower(), entity.capitalize())
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
search_params = []
for permid in permids:
# decide if we got a permId or an identifier
match = re.match('/', permid)
if match:
search_params.append(
{ "identifier" : permid, "@type" : type + 'Identifier' }
)
else:
search_params.append(
{ "permId" : permid, "@type": type + 'PermId' }
)
fo = {}
for option in options:
fo[option] = fetch_option[option]
request = {
"method": method_name,
"params": [
self.token,
search_params,
fo
],
}
return request
def get_terms(self, vocabulary=None):
""" Returns information about vocabulary, including its controlled vocabulary
search_request = {}
if vocabulary is not None:
search_request = _gen_search_criteria( {
"vocabulary": "VocabularyTerm",
"criteria" : [{
"vocabulary": "Vocabulary",
"code": vocabulary
}]
})
fetch_options = {
"vocabulary" : { "@type" : "as.dto.vocabulary.fetchoptions.VocabularyFetchOptions" },
"@type": "as.dto.vocabulary.fetchoptions.VocabularyTermFetchOptions"
}
request = {
"method": "searchVocabularyTerms",
"params": [ self.token, search_request, fetch_options ]
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
return Vocabulary(resp)
def get_tags(self):
""" Returns a DataFrame of all
"""
request = {
"method": "searchTags",
"params": [ self.token, {}, {} ]
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
objects = DataFrame(resp['objects'])
objects['registrationDate'] = objects['registrationDate'].map(format_timestamp)
return objects[['code', 'registrationDate']]
Swen Vermeul
committed
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
def get_sample_types(self, type=None):
""" Returns a list of all available sample types
"""
return self._get_types_of(
"searchSampleTypes",
"Sample",
type,
["generatedCodePrefix"]
)
def get_sample_type(self, type):
try:
return self._get_types_of(
"searchSampleTypes",
"Sample",
type,
["generatedCodePrefix"]
)
except Exception:
raise ValueError("no such sample type: {}".format(type))
def get_experiment_types(self, type=None):
""" Returns a list of all available experiment types
"""
return self._get_types_of(
"searchExperimentTypes",
"Experiment",
type
)
def get_experiment_type(self, type):
try:
return self._get_types_of(
"searchExperimentTypes",
"Experiment",
type
)
except Exception:
raise ValueError("No such experiment type: {}".format(type))
def get_material_types(self, type=None):
""" Returns a list of all available material types
"""
return self._get_types_of("searchMaterialTypes", "Material", type)
def get_material_type(self, type):
try:
return self._get_types_of("searchMaterialTypes", "Material", type)
except Exception:
raise ValueError("No such material type: {}".format(type))
def get_dataset_types(self, type=None):
""" Returns a list (DataFrame object) of all currently available dataset types
"""
return self._get_types_of("searchDataSetTypes", "DataSet", type, ['kind'] )
Swen Vermeul
committed
def get_dataset_type(self, type):
try:
return self._get_types_of("searchDataSetTypes", "DataSet", type, ['kind'])
except Exception:
raise ValueError("No such dataSet type: {}".format(type))
def _get_types_of(self, method_name, entity, type_name=None, additional_attributes=[]):
""" Returns a list of all available types of an entity.
If the name of the entity-type is given, it returns a PropertyAssignments object
Swen Vermeul
committed
attributes = ['code', 'description', *additional_attributes, 'modificationDate']
search_request = {}
fetch_options = {}
if type_name is not None:
search_request = _gen_search_criteria({
"operator": "AND",
})
fetch_options = {
Swen Vermeul
committed
"@type": "as.dto.{}.fetchoptions.{}TypeFetchOptions".format(
Swen Vermeul
committed
)
}
Swen Vermeul
committed
fetch_options['propertyAssignments'] = fetch_option['propertyAssignments']
attributes.append('propertyAssignments')
"params": [ self.token, search_request, fetch_options ],
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
if type_name is not None and len(resp['objects']) == 1:
return PropertyAssignments(self, resp['objects'][0])
if len(resp['objects']) >= 1:
types = DataFrame(resp['objects'])
Swen Vermeul
committed
types['modificationDate'] = types['modificationDate'].map(format_timestamp)
return Things(self, entity.lower()+'_type', types[attributes])
Swen Vermeul
committed
Swen Vermeul
committed
def is_session_active(self):
""" checks whether a session is still active. Returns true or false.
"""
Swen Vermeul
committed
return self.is_token_valid(self.token)
def is_token_valid(self, token=None):
Chandrasekhar Ramakrishnan
committed
"""Check if the connection to openBIS is valid.
This method is useful to check if a token is still valid or if it has timed out,
requiring the user to login again.
Chandrasekhar Ramakrishnan
committed
:return: Return True if the token is valid, False if it is not valid.
"""
if token is None:
token = self.token
if token is None:
return False
request = {
"method": "isSessionActive",
"params": [ token ],
resp = self._post_request(self.as_v1, request)
"""fetch a dataset and some metadata attached to it:
- properties
- sample
- parents
- children
- containers
- dataStore
- physicalData
- linkedData
:return: a DataSet object
"""
criteria = [{
"permId": permid,
"@type": "as.dto.dataset.id.DataSetPermId"
}]
fetchopts = {
"parents": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"children": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"containers": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"type": { "@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions" },
}
for option in ['tags', 'properties', 'dataStore', 'physicalData', 'linkedData',
'experiment', 'sample']:
fetchopts[option] = fetch_option[option]
request = {
"params": [ self.token,
criteria,
fetchopts,
resp = self._post_request(self.as_v3, request)
if resp is None or len(resp) == 0:
raise ValueError('no such dataset found: '+permid)
if resp is not None:
for permid in resp:
#return resp[permid]
return DataSet(self, self.get_dataset_type(resp[permid]["type"]["code"]), resp[permid])
Chandrasekhar Ramakrishnan
committed
Swen Vermeul
committed
def get_sample(self, sample_ident, only_data=False, withAttachments=False):
Chandrasekhar Ramakrishnan
committed
"""Retrieve metadata for the sample.
Get metadata for the sample and any directly connected parents of the sample to allow access
to the same information visible in the ELN UI. The metadata will be on the file system.
:param sample_identifiers: A list of sample identifiers to retrieve.
"""
Swen Vermeul
committed
fetchopts = { "type": { "@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions" } }
search_request = search_request_for_identifier(sample_ident, 'sample')
Swen Vermeul
committed
for option in ['tags', 'properties', 'attachments', 'space', 'experiment', 'registrator', 'dataSets']:
fetchopts[option] = fetch_option[option]
if withAttachments:
fetchopts['attachments'] = fetch_option['attachmentsWithContent']
#fetchopts["parents"] = { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" }
#fetchopts["children"] = { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" }
sample_request = {
"method": "getSamples",
"params": [
self.token,
[ search_request ],
Swen Vermeul
committed
fetchopts
resp = self._post_request(self.as_v3, sample_request)
parse_jackson(resp)
if resp is None or len(resp) == 0:
raise ValueError('no such sample found: '+sample_ident)
else:
for sample_ident in resp:
if only_data:
return resp[sample_ident]
else:
return Sample(self, self.get_sample_type(resp[sample_ident]["type"]["code"]), resp[sample_ident])
def new_space(self, name, description=None):
""" Creates a new space in the openBIS instance. Returns a list of all spaces
"""
request = {
"method": "createSpaces",
"params": [
self.token,
[ {
"code": name,
"description": description,
"@type": "as.dto.space.create.SpaceCreation"
} ]
],
}
resp = self._post_request(self.as_v3, request)
return self.get_spaces(refresh=True)
Swen Vermeul
committed
def new_analysis(self, name, description=None, sample=None, dss_code=None, result_files=None,
Swen Vermeul
committed
notebook_files=None, parents=None):
Swen Vermeul
committed
""" An analysis contains the Jupyter notebook file(s) and some result files.
Technically this method involves uploading files to the session workspace
and activating the dropbox aka dataset ingestion service "jupyter-uploader-api"
Swen Vermeul
committed
"""
if dss_code is None:
dss_code = self.get_datastores()['code'][0]
# if a sample identifier was given, use it as a string.
# if a sample object was given, take its identifier
Swen Vermeul
committed
sampleId = None
Swen Vermeul
committed
if isinstance(sample, str):
Swen Vermeul
committed
if (is_identifier(sample)):
sampleId = {
"identifier": sample,
"@type": "as.dto.sample.id.SampleIdentifier"
}
else:
sampleId = {
"permId": sample,
"@type": "as.dto.sample.id.SamplePermId"
}
Swen Vermeul
committed
else:
Swen Vermeul
committed
sampleId = {
"identifier": sample.identifier,
"@type": "as.dto.sample.id.SampleIdentifier"
}
parentIds = []
if parents is not None:
if not isinstance(parents, list):
parants = [parents]
for parent in parents:
parentIds.append(parent.permId)
Swen Vermeul
committed
Swen Vermeul
committed
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
Swen Vermeul
committed
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
data_sets = []
if notebook_files is not None:
notebooks_folder = os.path.join(folder, 'notebook_files')
self.upload_files(
datastore_url = datastore_url,
files=notebook_files,
folder= notebooks_folder,
wait_until_finished=True
)
data_sets.append({
"dataSetType" : "JUPYTER_NOTEBOOk",
"sessionWorkspaceFolder": notebooks_folder,
"fileNames" : notebook_files,
"properties" : {}
})
if result_files is not None:
results_folder = os.path.join(folder, 'result_files')
self.upload_files(
datastore_url = datastore_url,
files=result_files,
folder=results_folder,
wait_until_finished=True
)
data_sets.append({
"dataSetType" : "JUPYTER_RESULT",
"sessionWorkspaceFolder" : results_folder,
"fileNames" : result_files,
"properties" : {}
})
request = {
"method": "createReportFromAggregationService",
"params": [
self.token,
dss_code,
Swen Vermeul
committed
DROPBOX_PLUGIN,
Swen Vermeul
committed
"identifier" : sample.identifier
Swen Vermeul
committed
"sampleId": sampleId,
"parentIds": parentIds,
Swen Vermeul
committed
"containers" : [
{
"dataSetType" : "JUPYTER_CONTAINER",
"properties" : {
"NAME" : name,
"DESCRIPTION" : description
Swen Vermeul
committed
}
Swen Vermeul
committed
"dataSets" : data_sets,
resp = self._post_request(self.reg_v1, request)
try:
if resp['rows'][0][0]['value'] == 'OK':
return resp['rows'][0][1]['value']
except:
return resp
Swen Vermeul
committed
def new_sample(self, type, **kwargs):
""" Creates a new sample of a given sample type.
return Sample(self, self.get_sample_type(type), None, **kwargs)
def new_dataset(self, type, **kwargs):
""" Creates a new dataset of a given sample type.
"""
return DataSet(self, self.get_dataset_type(type.upper()), None, **kwargs)
def _get_dss_url(self, dss_code=None):
""" internal method to get the downloadURL of a datastore.
"""
Swen Vermeul
committed
dss = self.get_datastores()
if dss_code is None:
return dss['downloadUrl'][0]
else:
Swen Vermeul
committed
return dss[dss['code'] == dss_code]['downloadUrl'][0]
Swen Vermeul
committed
def upload_files(self, datastore_url=None, files=None, folder=None, wait_until_finished=False):
Swen Vermeul
committed
if datastore_url is None:
if files is None:
raise ValueError("Please provide a filename.")
Swen Vermeul
committed
if folder is None:
# create a unique foldername
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
if isinstance(files, str):
files = [files]
self.files = files
self.startByte = 0
self.endByte = 0
# define a queue to handle the upload threads
queue = DataSetUploadQueue()
real_files = []
for filename in files:
if os.path.isdir(filename):
real_files.extend([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(filename)) for f in fn])
else:
real_files.append(os.path.join(filename))
# compose the upload-URL and put URL and filename in the upload queue
for filename in real_files:
file_in_wsp = os.path.join(folder, filename)
Swen Vermeul
committed
self.files_in_wsp.append(file_in_wsp)
upload_url = (
Swen Vermeul
committed
datastore_url + '/session_workspace_file_upload'
+ '?filename=' + os.path.join(folder,filename)
+ '&id=1'
+ '&startByte=0&endByte=0'
+ '&sessionID=' + self.token
)
queue.put([upload_url, filename, self.verify_certificates])
# wait until all files have uploaded
if wait_until_finished:
queue.join()
# return files with full path in session workspace
Swen Vermeul
committed
return self.files_in_wsp
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
def __init__(self, workers=20):
# maximum files to be uploaded at once
self.upload_queue = Queue()
# define number of threads and start them
for t in range(workers):
t = Thread(target=self.upload_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the upload queue
"""
self.upload_queue.put(things)
def join(self):
""" needs to be called if you want to wait for all uploads to be finished
"""
self.upload_queue.join()
def upload_file(self):
while True:
# get the next item in the queue
upload_url, filename, verify_certificates = self.upload_queue.get()
filesize = os.path.getsize(filename)
# upload the file to our DSS session workspace
with open(filename, 'rb') as f:
resp = requests.post(upload_url, data=f, verify=verify_certificates)
resp.raise_for_status()
data = resp.json()
assert filesize == int(data['size'])
# Tell the queue that we are done
self.upload_queue.task_done()
Swen Vermeul
committed
def __init__(self, workers=20):
# maximum files to be downloaded at once
self.download_queue = Queue()
# define number of threads
for t in range(workers):
t = Thread(target=self.download_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the download queue
"""
self.download_queue.put(things)
Swen Vermeul
committed
def join(self):
""" needs to be called if you want to wait for all downloads to be finished
"""
self.download_queue.join()
def download_file(self):
while True:
url, filename, file_size, verify_certificates = self.download_queue.get()
Swen Vermeul
committed
# create the necessary directory structure if they don't exist yet
os.makedirs(os.path.dirname(filename), exist_ok=True)
# request the file in streaming mode
r = requests.get(url, stream=True, verify=verify_certificates)
Swen Vermeul
committed
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
assert os.path.getsize(filename) == int(file_size)
Swen Vermeul
committed
self.download_queue.task_done()
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
def __init__(self, openbis_obj, type, data=None, **kwargs):
self.__dict__['openbis'] = openbis_obj
self.__dict__['type'] = type
self.__dict__['p'] = PropertyHolder(openbis_obj, type)
self.__dict__['a'] = AttrHolder(openbis_obj, 'DataSet', type)
# existing OpenBIS object
if data is not None:
self._set_data(data)
if kwargs is not None:
for key in kwargs:
setattr(self, key, kwargs[key])
def __eq__(self, other):
return str(self) == str(other)
def __ne__(self, other):
return str(self) != str(other)
def _set_data(self, data):
# assign the attribute data to self.a by calling it
# (invoking the AttrHolder.__call__ function)
self.a(data)
self.__dict__['data'] = data
# put the properties in the self.p namespace (without checking them)
for key, value in data['properties'].items():
self.p.__dict__[key.lower()] = value
def get_space(self):
try:
return self.openbis.get_space(self._space['code'])
except Exception:
pass
def get_project(self):
try:
return self.openbis.get_project(self._project['identifier'])
except Exception:
pass
def get_experiment(self):
try:
return self.openbis.get_experiment(self._experiment['identifier'])
except Exception:
pass
def get_sample(self):
try:
return self.openbis.get_sample(self._sample['permId']['permId'])
except Exception:
pass
def __getattr__(self, name):
return getattr(self.__dict__['a'], name)
def __setattr__(self, name, value):
if name in ['set_properties', 'set_tags', 'add_tags']:
raise ValueError("These are methods which should not be overwritten")
setattr(self.__dict__['a'], name, value)
def _repr_html_(self):
"""Print all the assigned attributes (identifier, tags, etc.) in a nicely formatted table. See
AttributeHolder class.
"""
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
html = self.a._repr_html_()
return html
class DataSet(OpenBisObject):
""" DataSet are openBIS objects that contain the actual files.
"""
def __init__(self, openbis_obj, type, data=None, **kwargs):
super(DataSet, self).__init__(openbis_obj, type, data, **kwargs)
# existing DataSet
if data is not None:
if data['physicalData'] is None:
self.__dict__['shareId'] = None
self.__dict__['location'] = None
else:
self.__dict__['shareId'] = data['physicalData']['shareId']
self.__dict__['location'] = data['physicalData']['location']
def __str__(self):
return self.data['code']
def __dir__(self):
return ['props', 'get_parents()', 'get_children()', 'get_sample()', 'get_experiment()', 'download()', 'file_list()', 'get_files()', 'space', 'project', 'experiment', 'project','tags', 'attachments', 'data']
@property
def type(self):
return self.__dict__['type']
@type.setter
def type(self, type_name):
dataset_type = self.openbis.get_dataset_type(type_name.upper())
self.p.__dict__['_type'] = dataset_type
self.a.__dict__['_type'] = dataset_type
def set_properties(self, properties):
self.openbis.update_dataset(self.permId, properties=properties)
Swen Vermeul
committed
def download(self, files=None, wait_until_finished=True, workers=10):
""" download the actual files and put them by default in the following folder:
__current_dir__/hostname/dataset_permId/
If no files are specified, all files of a given dataset are downloaded.
Files are usually downloaded in parallel, using 10 workers by default. If you want to wait until
all the files are downloaded, set the wait_until_finished option to True.
Swen Vermeul
committed
"""
if files == None:
files = self.file_list()
elif isinstance(files, str):
files = [files]
base_url = self.data['dataStore']['downloadUrl'] + '/datastore_server/' + self.permId + '/'
Swen Vermeul
committed
queue = DataSetDownloadQueue(workers=workers)
# get file list and start download
for filename in files:
file_info = self.get_file_list(start_folder=filename)
file_size = file_info[0]['fileSize']
download_url = base_url + filename + '?sessionID=' + self.openbis.token
filename = os.path.join(self.openbis.hostname, self.permId, filename)
queue.put([download_url, filename, file_size, self.openbis.verify_certificates])
Swen Vermeul
committed
# wait until all files have downloaded
if wait_until_finished:
queue.join()
print("Files downloaded to: %s" % os.path.join(self.openbis.hostname, self.permId))
def get_parents(self):
return self.openbis.get_datasets(withChildren=self.permId)
def get_children(self):
return self.openbis.get_datasets(withParents=self.permId)
def file_list(self):
"""returns the list of files including their directories as an array of strings. Just folders are not
listed.
"""
files = []
for file in self.get_file_list(recursive=True):
if file['isDirectory']:
pass
else:
files.append(file['pathInDataSet'])
return files
def get_files(self, start_folder='/'):
"""Returns a DataFrame of all files in this dataset
"""
def createRelativePath(pathInDataSet):
if self.shareId is None:
return ''
else:
return os.path.join(self.shareId, self.location, pathInDataSet)
def signed_to_unsigned(sig_int):
"""openBIS delivers crc32 checksums as signed integers.
If the number is negative, we just have to add 2**32
We display the hex number to match with the classic UI
"""
if sig_int < 0:
sig_int += 2**32
return "%x"%(sig_int & 0xFFFFFFFF)
files = self.get_file_list(start_folder=start_folder)
df = DataFrame(files)
df['relativePath'] = df['pathInDataSet'].map(createRelativePath)
df['crc32Checksum'] = df['crc32Checksum'].fillna(0.0).astype(int).map(signed_to_unsigned)
return df[['isDirectory', 'pathInDataSet', 'fileSize', 'crc32Checksum']]
def get_file_list(self, recursive=True, start_folder="/"):
"""Lists all files of a given dataset. You can specifiy a start_folder other than "/".
By default, all directories and their containing files are listed recursively. You can
turn off this option by setting recursive=False.
"""
request = {
"method" : "listFilesForDataSet",
"params" : [
self.openbis.token,
"id":"1"
self.data["dataStore"]["downloadUrl"] + '/datastore_server/rmi-dss-api-v1.json',
json.dumps(request),
verify=self.openbis.verify_certificates
)
data = resp.json()
if 'error' in data:
Swen Vermeul
committed
raise ValueError('Error from openBIS: ' + data['error'] )
elif 'result' in data:
return data['result']
Swen Vermeul
committed
raise ValueError('request to openBIS did not return either result nor error')
Swen Vermeul
committed
raise ValueError('internal error while performing post request')
class AttrHolder():
""" General class for both samples and experiments that hold all common attributes, such as:
- space
Swen Vermeul
committed
- experiment (sample)
- samples (experiment)
Swen Vermeul
committed
- parents (sample, dataset)
- children (sample, dataset)
- tags
"""
def __init__(self, openbis_obj, entity, type=None):
self.__dict__['_openbis'] = openbis_obj
self.__dict__['_entity'] = entity
if type is not None:
self.__dict__['_allowed_attrs'] = _definitions(entity)['attrs']
self.__dict__['_identifier'] = None
self.__dict__['_is_new'] = True
Swen Vermeul
committed
def __call__(self, data):
self.__dict__['_is_new'] = False
Swen Vermeul
committed
for attr in self._allowed_attrs:
if attr in ["code","permId","identifier","type",
"container","components","attachments"]:
self.__dict__['_'+attr] = data.get(attr, None)
Swen Vermeul
committed
d = data.get(attr, None)
if d is not None:
d = d['permId']
self.__dict__['_'+attr] = d
elif attr in ["sample", "experiment", "project"]:
Swen Vermeul
committed
d = data.get(attr, None)
if d is not None:
d = d['identifier']
self.__dict__['_'+attr] = d
elif attr in ["parents","children","samples"]:
Swen Vermeul
committed
self.__dict__['_'+attr] = []
for item in data[attr]:
if 'identifier' in item:
self.__dict__['_'+attr].append(item['identifier'])
elif 'permId' in item:
self.__dict__['_'+attr].append(item['permId'])
Swen Vermeul
committed
Swen Vermeul
committed
self.__dict__['_'+attr] = []
for item in data[attr]:
self.__dict__['_'+attr].append({
"code": item['code'],
"@type": "as.dto.tag.id.TagCode"
})
elif attr in ["attachments"]:
pass
else:
self.__dict__['_'+attr] = data.get(attr, None)
def _all_attrs(self):
attr2ids = _definitions('attr2ids')
ids2type = _definitions('ids2type')
request = {}
# look at all attributes available for that entity
Swen Vermeul
committed
for attr in self._allowed_attrs:
# these attributes cannot be changed (or not directly)
if attr in ["code", "permId", "identifier", "type", "attachments"]:
continue
if '_'+attr in self.__dict__:
if self._is_new:
# handle multivalue attributes (parents, children, tags etc.)
if attr in defs['multi']:
items = self.__dict__.get('_'+attr, [])
if items == None:
items = []
Swen Vermeul
committed
request[attr2ids[attr]] = items
Swen Vermeul
committed
request[attr2ids[attr]] = self.__dict__.get('_'+attr, None)
else:
# handle multivalue attributes (parents, children, tags etc.)
# we only cover the Set mechanism, which means we always update all items in a
# list
if attr in defs['multi']:
items = self.__dict__.get('_'+attr, [])
if items == None:
items = []
Swen Vermeul
committed
request[attr2ids[attr]] = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateActionSet",
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
else:
# handle single attribut4es (space, experiment, project, container, etc.)
value = self.__dict__.get('_'+attr, {})
if value is None:
pass
else:
isModified=False
if 'isModified' in value:
isModified=True
del value['isModified']
Swen Vermeul
committed
request[attr2ids[attr]] = {
"@type": "as.dto.common.update.FieldUpdateValue",
"isModified": isModified,
"value": value,
}
if self.__dict__.get('_code', None) is None:
request['autoGeneratedCode'] = True
else:
pass
return request
def __getattr__(self, name):
""" handles all attribute requests dynamically. Values are returned in a sensible way,
for example the identifiers of parents, children and components are returned
as an array of values.
"""
int_name = '_'+name
Loading
Loading full blame...