Newer
Older
"dataStore",
"physicalData",
"linkedData",
"experiment",
"sample",
"registrator",
"modifier",
]:
fetchopts[option] = fetch_option[option]
request = {
"params": [
self.token,
Swen Vermeul
committed
identifiers,
resp = self._post_request(self.as_v3, request)
Swen Vermeul
committed
if just_one:
if len(resp) == 0:
raise ValueError("no such dataset found: {}".format(permIds))
Swen Vermeul
committed
parse_jackson(resp)
for permId in resp:
if only_data:
return resp[permId]
else:
return DataSet(
type=self.get_dataset_type(resp[permId]["type"]["code"]),
data=resp[permId],
Swen Vermeul
committed
)
else:
return self._dataset_list_for_response(
response=list(resp.values()), props=props
)
def _dataset_list_for_response(
self,
response,
attrs=None,
props=None,
start_with=None,
count=None,
totalCount=0,
objects=None,
"""returns a Things object, containing a DataFrame plus some additional information"""
def extract_attribute(attribute_to_extract):
def return_attribute(obj):
return ""
return obj.get(attribute_to_extract, "")
Swen Vermeul
committed
parse_jackson(response)
Swen Vermeul
committed
default_attrs = [
"permId",
"type",
"experiment",
"sample",
"registrationDate",
"modificationDate",
"location",
"status",
"presentInArchive",
"size",
Swen Vermeul
committed
]
display_attrs = default_attrs + attrs
Swen Vermeul
committed
def extract_project(attr):
Swen Vermeul
committed
def extract_attr(obj):
Swen Vermeul
committed
if attr:
Swen Vermeul
committed
else:
return obj["project"]["identifier"]["identifier"]
Swen Vermeul
committed
except KeyError:
Swen Vermeul
committed
return extract_attr
def extract_space(attr):
Swen Vermeul
committed
def extract_attr(obj):
Swen Vermeul
committed
if attr:
Swen Vermeul
committed
else:
Swen Vermeul
committed
except KeyError:
Swen Vermeul
committed
return extract_attr
if props is None:
props = []
else:
if isinstance(props, str):
props = [props]
Swen Vermeul
committed
if len(response) == 0:
datasets = DataFrame(columns=display_attrs)
Swen Vermeul
committed
else:
datasets = DataFrame(response)
if "project" in attr:
datasets[attr] = datasets["experiment"].map(extract_project(attr))
elif "space" in attr:
datasets[attr] = datasets["experiment"].map(extract_space(attr))
elif "." in attr:
entity, attribute_to_extract = attr.split(".")
for attr in attrs:
# if no dot supplied, just display the code of the space, project or experiment
if any(entity == attr for entity in ["experiment", "sample"]):
datasets[attr] = datasets[attr].map(extract_nested_identifier)
datasets["registrationDate"] = datasets["registrationDate"].map(
format_timestamp
)
datasets["modificationDate"] = datasets["modificationDate"].map(
format_timestamp
)
datasets["experiment"] = datasets["experiment"].map(
extract_nested_identifier
)
datasets["sample"] = datasets["sample"].map(extract_nested_identifier)
datasets["type"] = datasets["type"].map(extract_code)
datasets["permId"] = datasets["code"]
for column in ["parents", "children", "components", "containers"]:
datasets[column] = datasets[column].map(extract_identifiers)
datasets["size"] = datasets["physicalData"].map(
lambda x: x.get("size") if x else ""
)
datasets["status"] = datasets["physicalData"].map(
lambda x: x.get("status") if x else ""
)
datasets["presentInArchive"] = datasets["physicalData"].map(
lambda x: x.get("presentInArchive") if x else ""
)
datasets["location"] = datasets["physicalData"].map(
lambda x: x.get("location") if x else ""
)
Swen Vermeul
committed
for prop in props:
# include all properties in dataFrame.
# expand the dataFrame by adding new columns
columns = []
for i, dataSet in enumerate(response):
for prop_name, val in dataSet.get("properties", {}).items():
datasets.loc[i, prop_name.upper()] = val
columns.append(prop_name.upper())
display_attrs += set(columns)
# property name is provided
for i, dataSet in enumerate(response):
val = dataSet.get("properties", {}).get(
prop, ""
) or dataSet.get("properties", {}).get(prop.upper(), "")
datasets.loc[i, prop.upper()] = val
display_attrs.append(prop.upper())
start_with=start_with,
count=count,
totalCount=totalCount,
Swen Vermeul
committed
objects=objects,
Swen Vermeul
committed
self, sample_ident, only_data=False, withAttachments=False, props=None, **kvals
Chandrasekhar Ramakrishnan
committed
"""Retrieve metadata for the sample.
Get metadata for the sample and any directly connected parents of the sample to allow access
to the same information visible in the ELN UI. The metadata will be on the file system.
:param sample_identifiers: A list of sample identifiers to retrieve.
"""
Swen Vermeul
committed
identifiers = []
if isinstance(sample_ident, list):
Swen Vermeul
committed
for ident in sample_ident:
identifiers.append(_type_for_id(ident, "sample"))
Swen Vermeul
committed
else:
identifiers.append(_type_for_id(sample_ident, "sample"))
fetchopts = fetch_option["sample"]
options = [
"tags",
"properties",
"attachments",
"space",
"experiment",
"registrator",
"modifier",
"dataSets",
]
for option in options:
Swen Vermeul
committed
fetchopts[option] = fetch_option[option]
if withAttachments:
fetchopts["attachments"] = fetch_option["attachmentsWithContent"]
Swen Vermeul
committed
for key in ["parents", "children", "container", "components"]:
fetchopts[key] = {"@type": "as.dto.sample.fetchoptions.SampleFetchOptions"}
resp = self._post_request(self.as_v3, request)
Swen Vermeul
committed
if len(resp) == 0:
raise ValueError("no such sample found: {}".format(sample_ident))
Swen Vermeul
committed
parse_jackson(resp)
for sample_ident in resp:
if only_data:
return resp[sample_ident]
else:
type=self.get_sample_type(resp[sample_ident]["type"]["code"]),
data=resp[sample_ident],
Swen Vermeul
committed
else:
return self._sample_list_for_response(
response=list(resp.values()),
props=props,
)
Swen Vermeul
committed
@staticmethod
def decode_attribute(entity, attribute):
params = {}
attribute, *alias = re.split(r"\s+AS\s+", attribute, flags=re.IGNORECASE)
alias = alias[0] if alias else attribute
regex = re.compile(
r"""^ # beginning of the string
(?P<requested_entity>\w+) # the entity itself
(\.(?P<attribute>\w+))? # capture an optional .attribute
$ # end of string
match = re.search(regex, attribute)
params = match.groupdict()
if params["requested_entity"] == "object":
params["entity"] = "sample"
elif params["requested_entity"] == "collection":
params["entity"] = "experiment"
elif params["requested_entity"] in ["space", "project"]:
params["entity"] = params["requested_entity"]
if not params["attribute"]:
params["attribute"] = "code"
params["alias"] = alias
return params
def _decode_property(self, entity, property):
# match something like: property_name.term.label AS label_alias
regex = re.compile(
r"""^
(?P<alias_alternative>
(?P<property>[^\.]* )
(?:
\.
(?P<subentity>term|pa) \.
(?P<field>code|vocabularyCode|label|description|ordinal|dataType)
)?
)
(
\s+(?i)AS\s+
(?P<alias>\w+)
)?
\s*
$
)
match = re.search(regex, property)
if not match:
try:
params = self.decode_attribute(entity, property)
return params
except ValueError:
raise ValueError(f"unable to parse property: {property}")
params = match.groupdict()
if not params["alias"]:
params["alias"] = params["alias_alternative"]
return params
def _sample_list_for_response(
self,
response,
attrs=None,
props=None,
start_with=None,
count=None,
totalCount=0,
objects=None,
"""returns a Things object, containing a DataFrame plus additional information"""
def extract_attribute(attribute_to_extract):
def return_attribute(obj):
return ""
return obj.get(attribute_to_extract, "")
return return_attribute
Swen Vermeul
committed
parse_jackson(response)
default_attrs = [
"identifier",
"permId",
"type",
"registrator",
"registrationDate",
"modifier",
"modificationDate",
]
display_attrs = default_attrs + attrs
if props is None:
props = []
else:
if isinstance(props, str):
props = [props]
Swen Vermeul
committed
if len(response) == 0:
samples = DataFrame(columns=display_attrs)
Swen Vermeul
committed
else:
samples = DataFrame(response)
for attr in attrs:
if "." in attr:
entity, attribute_to_extract = attr.split(".")
for attr in attrs:
# if no dot supplied, just display the code of the space, project or experiment
if attr in ["project", "experiment"]:
samples[attr] = samples[attr].map(extract_nested_identifier)
if attr in ["space"]:
Swen Vermeul
committed
samples[attr] = samples[attr].map(extract_code)
samples["registrationDate"] = samples["registrationDate"].map(
format_timestamp
)
samples["modificationDate"] = samples["modificationDate"].map(
format_timestamp
)
samples["registrator"] = samples["registrator"].map(extract_person)
samples["modifier"] = samples["modifier"].map(extract_person)
samples["identifier"] = samples["identifier"].map(extract_identifier)
samples["container"] = samples["container"].map(extract_nested_identifier)
for column in ["parents", "children", "components"]:
if column in samples:
samples[column] = samples[column].map(extract_identifiers)
samples["permId"] = samples["permId"].map(extract_permid)
samples["type"] = samples["type"].map(extract_nested_permid)
Swen Vermeul
committed
for prop in props:
# include all properties in dataFrame.
# expand the dataFrame by adding new columns
columns = []
for i, sample in enumerate(response):
for prop_name, val in sample.get("properties", {}).items():
samples.loc[i, prop_name.upper()] = val
columns.append(prop_name.upper())
display_attrs += set(columns)
# property name is provided
for i, sample in enumerate(response):
val = sample.get("properties", {}).get(prop, "") or sample.get(
"properties", {}
).get(prop.upper(), "")
samples.loc[i, prop.upper()] = val
display_attrs.append(prop.upper())
start_with=start_with,
count=count,
totalCount=totalCount,
Swen Vermeul
committed
Swen Vermeul
committed
def get_external_data_management_systems(
self, start_with=None, count=None, only_data=False
):
entity = "externalDms"
fetchopts = get_fetchoption_for_entity(entity)
request = {
"method": "searchExternalDataManagementSystems",
"params": [
self.token,
criteria,
fetchopts,
],
}
response = self._post_request(self.as_v3, request)
parse_jackson(response)
attrs = "code label address addressType urlTemplate openbis".split()
entities = DataFrame(columns=attrs)
parse_jackson(objects)
entities = DataFrame(objects)
entities["permId"] = entities["permId"].map(extract_permid)
start_with=start_with,
count=count,
totalCount=totalCount,
)
def get_external_data_management_system(self, permId, only_data=False):
Chandrasekhar Ramakrishnan
committed
"""Retrieve metadata for the external data management system.
:param permId: A permId for an external DMS.
Chandrasekhar Ramakrishnan
committed
:param only_data: Return the result data as a hash-map, not an object.
"""
request = {
"method": "getExternalDataManagementSystems",
"params": [
self.token,
[
{
"@type": "as.dto.externaldms.id.ExternalDmsPermId",
"permId": permId,
}
],
{
"@type": "as.dto.externaldms.fetchoptions.ExternalDmsFetchOptions",
},
Chandrasekhar Ramakrishnan
committed
],
}
resp = self._post_request(self.as_v3, request)
parse_jackson(resp)
if resp is None or len(resp) == 0:
raise ValueError("no such external DMS found: " + permId)
Chandrasekhar Ramakrishnan
committed
else:
for ident in resp:
if only_data:
return resp[ident]
else:
return ExternalDMS(self, resp[ident])
get_externalDms = get_external_data_management_system # alias
def new_space(self, **kwargs):
"""Creates a new space in the openBIS instance."""
return Space(self, None, **kwargs)
def new_git_data_set(
self,
data_set_type,
path,
commit_id,
repository_id,
dms,
sample=None,
experiment=None,
properties={},
dss_code=None,
parents=None,
data_set_code=None,
contents=[],
):
"""Create a link data set.
:param data_set_type: The type of the data set
Chandrasekhar Ramakrishnan
committed
:param data_set_type: The type of the data set
:param path: The path to the git repository
:param commit_id: The git commit id
:param repository_id: The git repository id - same for copies
:param dms: An external data managment system object or external_dms_id
:param sample: A sample object or sample id.
Chandrasekhar Ramakrishnan
committed
:param dss_code: Code for the DSS -- defaults to the first dss if none is supplied.
:param properties: Properties for the data set.
:param parents: Parents for the data set.
Chandrasekhar Ramakrishnan
committed
:param data_set_code: A data set code -- used if provided, otherwise generated on the server
:param contents: A list of dicts that describe the contents:
{'file_length': [file length],
'crc32': [crc32 checksum],
'directory': [is path a directory?]
'path': [the relative path string]}
:return: A DataSet object
"""
return pbds.GitDataSetCreation(
self,
data_set_type,
path,
commit_id,
repository_id,
dms,
sample,
experiment,
properties,
dss_code,
parents,
data_set_code,
contents,
).new_git_data_set()
def new_content_copy(self, path, commit_id, repository_id, edms_id, data_set_id):
"""
Create a content copy in an existing link data set.
:param path: path of the new content copy
"param commit_id: commit id of the new content copy
"param repository_id: repository id of the content copy
"param edms_id: Id of the external data managment system of the content copy
"param data_set_id: Id of the data set to which the new content copy belongs
"""
return pbds.GitDataSetUpdate(self, data_set_id).new_content_copy(
path, commit_id, repository_id, edms_id
)
def search_files(self, data_set_id, dss_code=None):
return pbds.GitDataSetFileSearch(self, data_set_id).search_files()
def delete_content_copy(self, data_set_id, content_copy):
"""
Deletes a content copy from a data set.
:param data_set_id: Id of the data set containing the content copy
:param content_copy: The content copy to be deleted
"""
return pbds.GitDataSetUpdate(self, data_set_id).delete_content_copy(
content_copy
)
Chandrasekhar Ramakrishnan
committed
@staticmethod
def sample_to_sample_id(sample):
"""Take sample which may be a string or object and return an identifier for it."""
return Openbis._object_to_object_id(
sample, "as.dto.sample.id.SampleIdentifier", "as.dto.sample.id.SamplePermId"
)
@staticmethod
def experiment_to_experiment_id(experiment):
"""Take experiment which may be a string or object and return an identifier for it."""
return Openbis._object_to_object_id(
experiment,
"as.dto.experiment.id.ExperimentIdentifier",
"as.dto.experiment.id.SamplePermId",
)
@staticmethod
def _object_to_object_id(obj, identifierType, permIdType):
object_id = None
if isinstance(obj, str):
if is_identifier(obj):
object_id = {"identifier": obj, "@type": identifierType}
object_id = {"identifier": obj.identifier, "@type": identifierType}
return object_id
Chandrasekhar Ramakrishnan
committed
@staticmethod
def data_set_to_data_set_id(data_set):
if isinstance(data_set, str):
code = data_set
else:
code = data_set.permId
return {"permId": code, "@type": "as.dto.dataset.id.DataSetPermId"}
Chandrasekhar Ramakrishnan
committed
def external_data_managment_system_to_dms_id(self, dms):
if isinstance(dms, str):
dms_id = {"permId": dms, "@type": "as.dto.externaldms.id.ExternalDmsPermId"}
else:
dms_id = {
"identifier": dms.code,
}
return dms_id
def new_sample(self, type, project=None, props=None, **kwargs):
type -- sampleType code or object: mandatory
code -- name/code for the sample, if not generated automatically
space -- space code or object
project -- project code or object
experiment -- experiment code or object
collection -- same as above
props -- a dictionary containing the properties
if "collection" in kwargs:
kwargs["experiment"] = kwargs["collection"]
kwargs.pop("collection", None)
Swen Vermeul
committed
if isinstance(type, str):
sample_type = self.get_sample_type(type)
else:
sample_type = type
return Sample(
self, type=sample_type, project=project, data=None, props=props, **kwargs
)
Swen Vermeul
committed
def new_transaction(self, *entities):
return Transaction(*entities)
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
def new_sample_type(
self,
code,
generatedCodePrefix,
subcodeUnique=False,
autoGeneratedCode=False,
listable=True,
showContainer=False,
showParents=True,
showParentMetadata=False,
validationPlugin=None,
):
"""Creates a new sample type."""
return SampleType(
self,
code=code,
generatedCodePrefix=generatedCodePrefix,
autoGeneratedCode=autoGeneratedCode,
listable=listable,
showContainer=showContainer,
showParents=showParents,
showParentMetadata=showParentMetadata,
validationPlugin=validationPlugin,
method=self.get_sample_type,
)
new_object_type = new_sample_type
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
def new_dataset_type(
self,
code,
description=None,
mainDataSetPattern=None,
mainDataSetPath=None,
disallowDeletion=False,
validationPlugin=None,
):
"""Creates a new dataSet type."""
return DataSetType(
self,
code=code,
description=description,
mainDataSetPattern=mainDataSetPattern,
mainDataSetPath=mainDataSetPath,
disallowDeletion=disallowDeletion,
validationPlugin=validationPlugin,
method=self.get_dataset_type,
)
def new_experiment_type(
self,
code,
description=None,
validationPlugin=None,
):
"""Creates a new experiment type (collection type)"""
return ExperimentType(
self,
code=code,
description=description,
validationPlugin=validationPlugin,
method=self.get_experiment_type,
)
new_collection_type = new_experiment_type
def new_material_type(
self,
code,
description=None,
validationPlugin=None,
):
"""Creates a new material type."""
return MaterialType(
self,
code=code,
description=description,
validationPlugin=validationPlugin,
method=self.get_material_type,
)
def new_dataset(
self,
type=None,
kind="PHYSICAL_DATA",
files=None,
file=None,
props=None,
folder=None,
**kwargs,
):
"""Creates a new dataset of a given type.
type -- sampleType code or object: mandatory
sample -- sample code or object
experiment -- experiment code or object
collection -- same as above
file -- path to a single file or a directory
files -- list of paths to files. Instead of a file, a directory (or many directories)
can be provided, the structure is kept intact in openBIS
zipfile -- path to a zipfile, which is unzipped in openBIS
kind -- if set to CONTAINER, no files should be provided.
Instead, the dataset acts as a container for other datasets.
props -- a dictionary containing the properties
if type is None:
raise ValueError("Please provide a dataSet type")
if file:
files = [file]
Swen Vermeul
committed
if isinstance(type, str):
type_obj = self.get_dataset_type(type.upper())
else:
type_obj = type
if "object" in kwargs:
kwargs["sample"] = kwargs["object"]
kwargs.pop("object", None)
if "collection" in kwargs:
kwargs["experiment"] = kwargs["collection"]
kwargs.pop("collection", None)
return DataSet(
self,
type=type_obj,
kind=kind,
files=files,
folder=folder,
props=props,
**kwargs,
)
def new_semantic_annotation(self, entityType=None, propertyType=None, **kwargs):
return SemanticAnnotation(
openbis_obj=self,
isNew=True,
entityType=entityType,
propertyType=propertyType,
**kwargs,
def new_vocabulary(
self, code, terms, managedInternally=False, chosenFromList=True, **kwargs
):
"""Creates a new vocabulary
Usage::
new_vocabulary(
code = 'vocabulary_code',
description = '',
terms = [
{ "code": "term1", "label": "label1", "description": "description1" },
{ "code": "term2", "label": "label2", "description": "description2" },
]
)
"""
kwargs["code"] = code
kwargs["managedInternally"] = managedInternally
kwargs["chosenFromList"] = chosenFromList
return Vocabulary(self, data=None, terms=terms, **kwargs)
"""internal method to get the downloadURL of a datastore."""
Swen Vermeul
committed
dss = self.get_datastores()
if dss_code is None:
else:
return dss[dss["code"] == dss_code]["downloadUrl"][0]
class ExternalDMS:
"""managing openBIS external data management systems"""
Swen Vermeul
committed
def __init__(self, openbis_obj, data=None, **kwargs):
Swen Vermeul
committed
Swen Vermeul
committed
if kwargs is not None:
for key in kwargs:
setattr(self, key, kwargs[key])
Swen Vermeul
committed
def __getattr__(self, name):
Swen Vermeul
committed
"""all the available methods and attributes that should be displayed
when using the autocompletion feature (TAB) in Jupyter
"""
return ["code", "label", "urlTemplate", "address", "addressType", "openbis"]
Swen Vermeul
committed
def __str__(self):
def __init__(self, info):
self._info = info
self.attrs = [
"api_version",
"archiving_configured",
"authentication_service",
"enabled_technologies",
"project_samples_enabled",
def __dir__(self):
return self.attrs
def __getattr__(self, name):
Fuentes Serna Juan Mariano (ID SIS)
committed
def get_major_version(self):
return int(self._info["api-version"].split(".")[0])
Fuentes Serna Juan Mariano (ID SIS)
committed
def get_minor_version(self):
return int(self._info["api-version"].split(".")[1])
Fuentes Serna Juan Mariano (ID SIS)
committed
def is_openbis_1605(self):
return (self.get_major_version() == 3) and (self.get_minor_version() <= 2)
Fuentes Serna Juan Mariano (ID SIS)
committed
def is_openbis_1806(self):
return (self.get_major_version() == 3) and (self.get_minor_version() >= 5)
def _repr_html_(self):
html = """
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>attribute</th>
<th>value</th>
</tr>
</thead>
<tbody>
"""
for attr in self.attrs:
html += "<tr> <td>{}</td> <td>{}</td> </tr>".format(
)
html += """
</tbody>
</table>
"""
return html
Swen Vermeul
committed
class PropertyType(
OpenBisObject, entity="propertyType", single_item_method_name="get_property_type"
Swen Vermeul
committed
):
pass
class Plugin(OpenBisObject, entity="plugin", single_item_method_name="get_plugin"):