Newer
Older
experiments = DataFrame(objects)
experiments['registrationDate']= experiments['registrationDate'].map(format_timestamp)
experiments['modificationDate']= experiments['modificationDate'].map(format_timestamp)
experiments['project']= experiments['project'].map(extract_code)
experiments['registrator'] = experiments['registrator'].map(extract_person)
experiments['modifier'] = experiments['modifier'].map(extract_person)
experiments['identifier'] = experiments['identifier'].map(extract_identifier)
experiments['type'] = experiments['type'].map(extract_code)
exps = experiments[['code', 'identifier', 'project', 'type', 'registrator',
'registrationDate', 'modifier', 'modificationDate']]
return Things(self, 'experiment', exps, 'identifier')
def get_datasets(self, code=None, type=None, withParents=None, withChildren=None, withSamples=None):
sub_criteria = []
if code:
sub_criteria.append(_criteria_for_code(code))
if type:
sub_criteria.append(_subcriteria_for_type(type, 'DataSet'))
if withParents:
sub_criteria.append(_subcriteria_for_permid(withParents, 'DataSet', 'Parents'))
if withChildren:
sub_criteria.append(_subcriteria_for_permid(withChildren, 'DataSet', 'Children'))
if withSamples:
sub_criteria.append(_subcriteria_for_permid(withSamples, 'Sample'))
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.dataset.search.DataSetSearchCriteria",
"operator": "AND"
}
fetchopts = {
# "parents": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
# "children": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"containers": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"type": { "@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions" }
}
for option in ['tags', 'properties', 'sample']:
fetchopts[option] = fetch_option[option]
request = {
"method": "searchDataSets",
"params": [ self.token,
criteria,
fetchopts,
],
}
resp = self._post_request(self.as_v3, request)
objects = resp['objects']
if len(objects) == 0:
raise ValueError("no datasets found!")
else:
parse_jackson(objects)
datasets = DataFrame(objects)
datasets['registrationDate']= datasets['registrationDate'].map(format_timestamp)
datasets['modificationDate']= datasets['modificationDate'].map(format_timestamp)
datasets['sample']= datasets['sample'].map(extract_nested_identifier)
datasets['type']= datasets['type'].map(extract_code)
ds = Things(
self,
'dataset',
datasets[['code', 'properties', 'type', 'sample', 'registrationDate', 'modificationDate']]
)
return ds
def get_experiment(self, expId):
""" Returns an experiment object for a given identifier (expId).
"""
fetchopts = {
"@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions"
}
search_request = search_request_for_identifier(expId, 'experiment')
for option in ['tags', 'properties', 'attachments', 'project']:
fetchopts[option] = fetch_option[option]
request = {
"method": "getExperiments",
"params": [
self.token,
[ search_request ],
fetchopts
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
],
}
resp = self._post_request(self.as_v3, request)
if len(resp) == 0:
raise ValueError("No such experiment: %s" % expId)
return Experiment(self, resp[expId])
def new_experiment(self, project_ident, code, type, properties=None, attachments=None, tags=None):
tagIds = _create_tagIds(tags)
typeId = _create_typeId(type)
projectId = _create_projectId(project_ident)
if properties is None:
properties = {}
request = {
"method": "createExperiments",
"params": [
self.token,
[
{
"properties": properties,
"code": code,
"typeId" : typeId,
"projectId": projectId,
"tagIds": tagIds,
"attachments": attachments,
"@type": "as.dto.experiment.create.ExperimentCreation",
}
]
],
}
resp = self._post_request(self.as_v3, request)
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
return self.get_experiment(resp[0]['permId'])
def update_experiment(self, experimentId, properties=None, tagIds=None, attachments=None):
params = {
"experimentId": {
"permId": experimentId,
"@type": "as.dto.experiment.id.ExperimentPermId"
},
"@type": "as.dto.experiment.update.ExperimentUpdate"
}
if properties is not None:
params["properties"]= properties
if tagIds is not None:
params["tagIds"] = tagIds
if attachments is not None:
params["attachments"] = attachments
request = {
"method": "updateExperiments",
"params": [
self.token,
[ params ]
]
}
self._post_request(self.as_v3, request)
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
def create_sample(self, space_ident, code, type,
project_ident=None, experiment_ident=None, properties=None, attachments=None, tags=None):
tagIds = _create_tagIds(tags)
typeId = _create_typeId(type)
projectId = _create_projectId(project_ident)
experimentId = _create_experimentId(experiment_ident)
if properties is None:
properties = {}
request = {
"method": "createSamples",
"params": [
self.token,
[
{
"properties": properties,
"code": code,
"typeId" : typeId,
"projectId": projectId,
"experimentId": experimentId,
"tagIds": tagIds,
"attachments": attachments,
"@type": "as.dto.sample.create.SampleCreation",
}
]
],
}
resp = self._post_request(self.as_v3, request)
return self.get_sample(resp[0]['permId'])
def update_sample(self, sampleId, space=None, project=None, experiment=None,
parents=None, children=None, components=None, properties=None, tagIds=None, attachments=None):
params = {
"sampleId": {
"permId": sampleId,
"@type": "as.dto.sample.id.SamplePermId"
},
"@type": "as.dto.sample.update.SampleUpdate"
}
if space is not None:
params['spaceId'] = space
if project is not None:
params['projectId'] = project
if properties is not None:
params["properties"]= properties
if tagIds is not None:
params["tagIds"] = tagIds
if attachments is not None:
params["attachments"] = attachments
request = {
"method": "updateSamples",
"params": [
self.token,
[ params ]
]
}
self._post_request(self.as_v3, request)
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
def delete_entity(self, what, permid, reason):
"""Deletes Spaces, Projects, Experiments, Samples and DataSets
"""
entity_type = "as.dto.{}.id.{}PermId".format(what.lower(), what.capitalize())
request = {
"method": "delete" + what.capitalize() + 's',
"params": [
self.token,
[
{
"permId": permid,
"@type": entity_type
}
],
{
"reason": reason,
"@type": "as.dto.{}.delete.{}DeletionOptions".format(what.lower(), what.capitalize())
}
]
}
self._post_request(self.as_v3, request)
def get_deletions(self):
request = {
"method": "searchDeletions",
"params": [
self.token,
{},
{
"deletedObjects": {
"@type": "as.dto.deletion.fetchoptions.DeletedObjectFetchOptions"
}
}
]
}
resp = self._post_request(self.as_v3, request)
objects = resp['objects']
parse_jackson(objects)
new_objs = []
for value in objects:
del_objs = extract_deletion(value)
if len(del_objs) > 0:
new_objs.append(*del_objs)
return DataFrame(new_objs)
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
def new_project(self, space_code, code, description, leaderId):
request = {
"method": "createProjects",
"params": [
self.token,
[
{
"code": code,
"spaceId": {
"permId": space_code,
"@type": "as.dto.space.id.SpacePermId"
},
"@type": "as.dto.project.create.ProjectCreation",
"description": description,
"leaderId": leaderId,
"attachments": None
}
]
],
}
resp = self._post_request(self.as_v3, request)
return resp
def get_project(self, projectId):
request = self._create_get_request('getProjects', 'project', projectId, ['attachments'])
resp = self._post_request(self.as_v3, request)
return resp
def get_projects(self, space=None):
""" Get a list of all available projects (DataFrame object).
"""
if space is None:
space = self.default_space
sub_criteria = []
if space:
sub_criteria.append(_subcriteria_for_code(space, 'space'))
criteria = {
"criteria": sub_criteria,
"@type": "as.dto.project.search.ProjectSearchCriteria",
"operator": "AND"
}
options = {
"registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
"experiments": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions", },
"space": { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" },
"@type": "as.dto.project.fetchoptions.ProjectFetchOptions"
}
request = {
"method": "searchProjects",
"params": [ self.token,
criteria,
options,
],
}
resp = self._post_request(self.as_v3, request)
if resp is not None:
objects = resp['objects']
parse_jackson(objects)
projects = DataFrame(objects)
if len(projects) is 0:
raise ValueError("No projects found!")
projects['registrationDate']= projects['registrationDate'].map(format_timestamp)
projects['modificationDate']= projects['modificationDate'].map(format_timestamp)
projects['registrator'] = projects['registrator'].map(extract_person)
projects['modifier'] = projects['modifier'].map(extract_person)
projects['permid'] = projects['permId'].map(extract_permid)
projects['identifier'] = projects['identifier'].map(extract_identifier)
projects['space'] = projects['space'].map(extract_code)
pros=projects[['code', 'space', 'registrator', 'registrationDate',
'modifier', 'modificationDate', 'permid', 'identifier']]
return Things(self, 'project', pros, 'identifier')
else:
raise ValueError("No projects found!")
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
def _create_get_request(self, method_name, entity_type, permids, options):
if not isinstance(permids, list):
permids = [permids]
type = "as.dto.{}.id.{}".format(entity_type.lower(), entity_type.capitalize())
search_params = []
for permid in permids:
# decide if we got a permId or an identifier
match = re.match('/', permid)
if match:
search_params.append(
{ "identifier" : permid, "@type" : type + 'Identifier' }
)
else:
search_params.append(
{ "permId" : permid, "@type": type + 'PermId' }
)
fo = {}
for option in options:
fo[option] = fetch_option[option]
request = {
"method": method_name,
"params": [
self.token,
search_params,
fo
],
}
return request
def get_sample_types(self, type=None):
""" Returns a list of all available sample types
return self._get_types_of("searchSampleTypes", "Sample", type, ["generatedCodePrefix"])
def get_sample_type(self, type):
return self._get_types_of("searchSampleTypes", "Sample", type, ["generatedCodePrefix"])
def get_experiment_types(self, type=None):
""" Returns a list of all available experiment types
"""
return self._get_types_of("searchExperimentTypes", "Experiment", type)
def get_material_types(self, type=None):
""" Returns a list of all available material types
"""
return self._get_types_of("searchMaterialTypes", "Material", type)
def get_dataset_types(self, type=None):
""" Returns a list (DataFrame object) of all currently available dataset types
"""
return self._get_types_of("searchDataSetTypes", "DataSet", type)
def get_terms(self, vocabulary=None):
""" Returns information about vocabulary, including its controlled vocabulary
search_request = {}
if vocabulary is not None:
search_request = _gen_search_request( {
"vocabulary": "VocabularyTerm",
"criteria" : [{
"vocabulary": "Vocabulary",
"code": vocabulary
}]
})
fetch_options = {
"vocabulary" : { "@type" : "as.dto.vocabulary.fetchoptions.VocabularyFetchOptions" },
"@type": "as.dto.vocabulary.fetchoptions.VocabularyTermFetchOptions"
}
request = {
"method": "searchVocabularyTerms",
"params": [ self.token, search_request, fetch_options ]
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
return Vocabulary(resp)
def get_tags(self):
""" Returns a DataFrame of all
"""
request = {
"method": "searchTags",
"params": [ self.token, {}, {} ]
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
objects = DataFrame(resp['objects'])
objects['registrationDate'] = objects['registrationDate'].map(format_timestamp)
return objects[['code', 'registrationDate']]
def _get_types_of(self, method_name, entity_type, type=None, additional_attributes=[]):
""" Returns a list of all available experiment types
"""
attributes = ['code', 'description', *additional_attributes]
search_request = {}
fetch_options = {}
if type is not None:
search_request = _gen_search_request({
entity_type.lower(): entity_type.capitalize() + "Type",
"operator": "AND",
"code": type
})
fetch_options = {
"propertyAssignments" : {
"@type" : "as.dto.property.fetchoptions.PropertyAssignmentFetchOptions"
},
"@type": "as.dto.{}.fetchoptions.{}TypeFetchOptions".format(entity_type.lower(), entity_type)
}
attributes.append('propertyAssignments')
"params": [ self.token, search_request, fetch_options ],
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
if type is not None and len(resp['objects']) == 1:
return PropertyAssignments(self, resp['objects'][0])
if len(resp['objects']) >= 1:
types = DataFrame(resp['objects'])
return types[attributes]
Swen Vermeul
committed
def is_session_active(self):
""" checks whether a session is still active. Returns true or false.
"""
Swen Vermeul
committed
return self.is_token_valid(self.token)
def is_token_valid(self, token=None):
Chandrasekhar Ramakrishnan
committed
"""Check if the connection to openBIS is valid.
This method is useful to check if a token is still valid or if it has timed out,
requiring the user to login again.
Chandrasekhar Ramakrishnan
committed
:return: Return True if the token is valid, False if it is not valid.
"""
if token is None:
token = self.token
if token is None:
return False
request = {
"method": "isSessionActive",
"params": [ token ],
resp = self._post_request(self.as_v1, request)
"""fetch a dataset and some metadata attached to it:
- properties
- sample
- parents
- children
- containers
- dataStore
- physicalData
- linkedData
:return: a DataSet object
"""
criteria = [{
"permId": permid,
"@type": "as.dto.dataset.id.DataSetPermId"
}]
fetchopts = {
"parents": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"children": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"containers": { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions",
}
for option in ['tags', 'properties', 'dataStore', 'physicalData', 'linkedData',
'experiment', 'sample']:
fetchopts[option] = fetch_option[option]
request = {
"params": [ self.token,
criteria,
fetchopts,
resp = self._post_request(self.as_v3, request)
if resp is not None:
for permid in resp:
return DataSet(self, resp[permid])
Chandrasekhar Ramakrishnan
committed
def get_sample(self, sample_ident, only_data=False):
Chandrasekhar Ramakrishnan
committed
"""Retrieve metadata for the sample.
Get metadata for the sample and any directly connected parents of the sample to allow access
to the same information visible in the ELN UI. The metadata will be on the file system.
:param sample_identifiers: A list of sample identifiers to retrieve.
"""
search_request = search_request_for_identifier(sample_ident, 'sample')
"type": {
"@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions"
},
"parents": { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
"children": { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
"experiment": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" },
"dataSets": {
"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions",
"properties": {
"@type": "as.dto.property.fetchoptions.PropertyFetchOptions"
},
"type": {
"@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions"
},
},
"space": fetch_option['space'],
"properties": fetch_option['properties'],
"registrator": fetch_option['registrator'],
"tags": fetch_option['tags'],
}
sample_request = {
"method": "getSamples",
"params": [
self.token,
[ search_request ],
resp = self._post_request(self.as_v3, sample_request)
parse_jackson(resp)
if resp is None or len(resp) == 0:
raise ValueError('no such sample found: '+sample_ident)
else:
for sample_ident in resp:
if only_data:
return resp[sample_ident]
else:
return Sample(self, self.get_sample_type(resp[sample_ident]["type"]["code"]), resp[sample_ident])
def new_space(self, name, description=None):
""" Creates a new space in the openBIS instance. Returns a list of all spaces
"""
request = {
"method": "createSpaces",
"params": [
self.token,
[ {
"@id": 0,
"code": name,
"description": description,
"@type": "as.dto.space.create.SpaceCreation"
} ]
],
}
resp = self._post_request(self.as_v3, request)
return self.get_spaces(refresh=True)
Swen Vermeul
committed
def new_analysis(self, name, description=None, sample=None, dss_code=None, result_files=None,
notebook_files=None, parents=[]):
""" An analysis contains the Jupyter notebook file(s) and some result files.
Technically this method involves uploading files to the session workspace
and activating the dropbox aka dataset ingestion service "jupyter-uploader-api"
Swen Vermeul
committed
"""
if dss_code is None:
dss_code = self.get_datastores()['code'][0]
# if a sample identifier was given, use it as a string.
# if a sample object was given, take its identifier
# TODO: handle permId's
sample_identifier = None
if isinstance(sample, str):
sample_identifier = sample
else:
sample_identifier = sample.ident
Swen Vermeul
committed
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
Swen Vermeul
committed
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
data_sets = []
if notebook_files is not None:
notebooks_folder = os.path.join(folder, 'notebook_files')
self.upload_files(
datastore_url = datastore_url,
files=notebook_files,
folder= notebooks_folder,
wait_until_finished=True
)
data_sets.append({
"dataSetType" : "JUPYTER_NOTEBOOk",
"sessionWorkspaceFolder": notebooks_folder,
"fileNames" : notebook_files,
"properties" : {}
})
if result_files is not None:
results_folder = os.path.join(folder, 'result_files')
self.upload_files(
datastore_url = datastore_url,
files=result_files,
folder=results_folder,
wait_until_finished=True
)
data_sets.append({
"dataSetType" : "JUPYTER_RESULT",
"sessionWorkspaceFolder" : results_folder,
"fileNames" : result_files,
"properties" : {}
})
request = {
"method": "createReportFromAggregationService",
"params": [
self.token,
dss_code,
Swen Vermeul
committed
DROPBOX_PLUGIN,
Swen Vermeul
committed
"identifier" : sample.identifier
Swen Vermeul
committed
"containers" : [
{
"dataSetType" : "JUPYTER_CONTAINER",
"properties" : {
"NAME" : name,
"DESCRIPTION" : description
Swen Vermeul
committed
}
Swen Vermeul
committed
"dataSets" : data_sets,
"parents" : parents,
resp = self._post_request(self.reg_v1, request)
try:
if resp['rows'][0][0]['value'] == 'OK':
return resp['rows'][0][1]['value']
except:
return resp
Swen Vermeul
committed
def new_sample(self, type, **kwargs):
""" Creates a new sample of a given sample type.
return Sample(self, self.get_sample_type(type), None, **kwargs)
def _get_dss_url(self, dss_code=None):
""" internal method to get the downloadURL of a datastore.
"""
Swen Vermeul
committed
dss = self.get_datastores()
if dss_code is None:
return dss['downloadUrl'][0]
else:
Swen Vermeul
committed
return dss[dss['code'] == dss_code]['downloadUrl'][0]
Swen Vermeul
committed
def upload_files(self, datastore_url=None, files=None, folder=None, wait_until_finished=False):
Swen Vermeul
committed
if datastore_url is None:
if files is None:
raise ValueError("Please provide a filename.")
Swen Vermeul
committed
if folder is None:
# create a unique foldername
folder = time.strftime('%Y-%m-%d_%H-%M-%S')
if isinstance(files, str):
files = [files]
self.files = files
self.startByte = 0
self.endByte = 0
# define a queue to handle the upload threads
queue = DataSetUploadQueue()
real_files = []
for filename in files:
if os.path.isdir(filename):
real_files.extend([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(filename)) for f in fn])
else:
real_files.append(os.path.join(filename))
# compose the upload-URL and put URL and filename in the upload queue
for filename in real_files:
file_in_wsp = os.path.join(folder, filename)
Swen Vermeul
committed
self.files_in_wsp.append(file_in_wsp)
upload_url = (
Swen Vermeul
committed
datastore_url + '/session_workspace_file_upload'
+ '?filename=' + os.path.join(folder,filename)
+ '&id=1'
+ '&startByte=0&endByte=0'
+ '&sessionID=' + self.token
)
queue.put([upload_url, filename, self.verify_certificates])
# wait until all files have uploaded
if wait_until_finished:
queue.join()
# return files with full path in session workspace
Swen Vermeul
committed
return self.files_in_wsp
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
class DataSetUploadQueue:
def __init__(self, workers=20):
# maximum files to be uploaded at once
self.upload_queue = Queue()
# define number of threads and start them
for t in range(workers):
t = Thread(target=self.upload_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the upload queue
"""
self.upload_queue.put(things)
def join(self):
""" needs to be called if you want to wait for all uploads to be finished
"""
self.upload_queue.join()
def upload_file(self):
while True:
# get the next item in the queue
upload_url, filename, verify_certificates = self.upload_queue.get()
filesize = os.path.getsize(filename)
# upload the file to our DSS session workspace
with open(filename, 'rb') as f:
resp = requests.post(upload_url, data=f, verify=verify_certificates)
resp.raise_for_status()
data = resp.json()
assert filesize == int(data['size'])
# Tell the queue that we are done
self.upload_queue.task_done()
Swen Vermeul
committed
class DataSetDownloadQueue:
def __init__(self, workers=20):
# maximum files to be downloaded at once
self.download_queue = Queue()
# define number of threads
for t in range(workers):
t = Thread(target=self.download_file)
t.daemon = True
t.start()
def put(self, things):
""" expects a list [url, filename] which is put into the download queue
"""
self.download_queue.put(things)
Swen Vermeul
committed
def join(self):
""" needs to be called if you want to wait for all downloads to be finished
"""
self.download_queue.join()
def download_file(self):
while True:
url, filename, file_size, verify_certificates = self.download_queue.get()
Swen Vermeul
committed
# create the necessary directory structure if they don't exist yet
os.makedirs(os.path.dirname(filename), exist_ok=True)
# request the file in streaming mode
r = requests.get(url, stream=True, verify=verify_certificates)
Swen Vermeul
committed
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
assert os.path.getsize(filename) == int(file_size)
Swen Vermeul
committed
self.download_queue.task_done()
class DataSet():
""" DataSet are openBIS objects that contain the actual files.
"""
def __init__(self, openbis_obj, data):
self.data = data
self.permid = data["code"]
self.permId = data["code"]
if data['physicalData'] is None:
self.shareId = None
self.location = None
else:
self.shareId = data['physicalData']['shareId']
self.location = data['physicalData']['location']
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>attribute</th>
<th>value</th>
</tr>
</thead>
<tbody>
<tr> <th>permId</th> <td>{}</td> </tr>
<tr> <th>properties</th> <td>{}</td> </tr>
<tr> <th>tags</th> <td>{}</td> </tr>
</tbody>
</table>
"""
return html.format(self.permid, self.data['properties'], self.data['tags'])
Swen Vermeul
committed
def download(self, files=None, wait_until_finished=True, workers=10):
""" download the actual files and put them by default in the following folder:
Swen Vermeul
committed
__current_dir__/hostname/dataset_permid/
If no files are specified, all files of a given dataset are downloaded.
Files are usually downloaded in parallel, using 10 workers by default. If you want to wait until
all the files are downloaded, set the wait_until_finished option to True.
Swen Vermeul
committed
"""
if files == None:
files = self.file_list()
elif isinstance(files, str):
files = [files]
base_url = self.data['dataStore']['downloadUrl'] + '/datastore_server/' + self.permid + '/'
Swen Vermeul
committed
queue = DataSetDownloadQueue(workers=workers)
# get file list and start download
for filename in files:
file_info = self.get_file_list(start_folder=filename)
file_size = file_info[0]['fileSize']
download_url = base_url + filename + '?sessionID=' + self.openbis.token
filename = os.path.join(self.openbis.hostname, self.permid, filename)
queue.put([download_url, filename, file_size, self.openbis.verify_certificates])
Swen Vermeul
committed
# wait until all files have downloaded
if wait_until_finished:
queue.join()
print("Files downloaded to: %s" % os.path.join(self.openbis.hostname, self.permid))
def get_parents(self):
return self.openbis.get_datasets(withChildren=self.permid)
def get_children(self):
return self.openbis.get_datasets(withParents=self.permid)
def file_list(self):
files = []
for file in self.get_file_list(recursive=True):
if file['isDirectory']:
pass
else:
files.append(file['pathInDataSet'])
return files
def get_files(self, start_folder='/'):
""" Returns a DataFrame of all files in this dataset
"""
def createRelativePath(pathInDataSet):
if self.shareId is None:
return ''
else:
return os.path.join(self.shareId, self.location, pathInDataSet)
files = self.get_file_list(start_folder=start_folder)
df = DataFrame(files)
df['relativePath'] = df['pathInDataSet'].map(createRelativePath)
df['crc32Checksum'] = df['crc32Checksum'].fillna(0.0).astype(int).map(signed_to_unsigned)
return df[['isDirectory', 'pathInDataSet', 'fileSize', 'crc32Checksum']]
def get_file_list(self, recursive=True, start_folder="/"):
""" Lists all files of a given dataset. You can specifiy a start_folder other than "/".
By default, all directories and their containing files are listed recursively. You can
turn off this option by setting recursive=False.
"""
request = {
"method" : "listFilesForDataSet",
"params" : [
self.openbis.token,
self.permid,
"id":"1"
self.data["dataStore"]["downloadUrl"] + '/datastore_server/rmi-dss-api-v1.json',
json.dumps(request),
verify=self.openbis.verify_certificates
)
data = resp.json()
if 'error' in data: