Skip to content
Snippets Groups Projects
pybis.py 89.2 KiB
Newer Older
  • Learn to ignore specific revisions
  •             "@type" : "as.dto.semanticannotation.search.SemanticAnnotationSearchCriteria",
                "criteria" : [{
                    "@type" : "as.dto.common.search.PermIdSearchCriteria",
                    "fieldValue" : {
                        "@type" : "as.dto.common.search.StringEqualToValue",
                        "value" : permId
                    }
                }]
            }
    
            objects = self._search_semantic_annotations(criteria)
            object = objects[0]
    
            if only_data:
                return object
            else:
                return SemanticAnnotation(self, isNew=False, **object)    
        
    
        def get_sample_types(self, type=None):
            """ Returns a list of all available sample types
            """
            return self._get_types_of(
                "searchSampleTypes",
                "Sample",
    
        get_object_types = get_sample_types # Alias
    
    
        def get_sample_type(self, type):
            try:
                return self._get_types_of(
    
                    "Sample",
                    type,
                    ["generatedCodePrefix"]
                )
            except Exception:
                raise ValueError("no such sample type: {}".format(type))
    
    
        get_object_type = get_sample_type # Alias
    
    
        def get_experiment_types(self, type=None):
            """ Returns a list of all available experiment types
            """
            return self._get_types_of(
    
                "searchExperimentTypes",
                "Experiment",
    
                    "searchExperimentTypes",
                    "Experiment",
    
                    type
                )
            except Exception:
                raise ValueError("No such experiment type: {}".format(type))
    
        def get_material_types(self, type=None):
            """ Returns a list of all available material types
            """
            return self._get_types_of("searchMaterialTypes", "Material", type)
    
        def get_material_type(self, type):
            try:
                return self._get_types_of("searchMaterialTypes", "Material", type)
            except Exception:
                raise ValueError("No such material type: {}".format(type))
    
        def get_dataset_types(self, type=None):
            """ Returns a list (DataFrame object) of all currently available dataset types
            """
    
            return self._get_types_of("searchDataSetTypes", "DataSet", type, optional_attributes=['kind'])
    
                return self._get_types_of("searchDataSetTypes", "DataSet", type, optional_attributes=['kind'])
    
            except Exception:
                raise ValueError("No such dataSet type: {}".format(type))
    
    
        def _get_types_of(self, method_name, entity, type_name=None, additional_attributes=[], optional_attributes=[]):
    
            """ Returns a list of all available types of an entity.
            If the name of the entity-type is given, it returns a PropertyAssignments object
    
            if type_name is not None:
                search_request = _gen_search_criteria({
    
                    entity.lower(): entity + "Type",
    
                    "code": type_name
    
                    "@type": "as.dto.{}.fetchoptions.{}TypeFetchOptions".format(
    
                        entity.lower(), entity
    
                fetch_options['propertyAssignments'] = fetch_option['propertyAssignments']
    
            request = {
                "method": method_name,
    
                "params": [self.token, search_request, fetch_options],
    
            }
            resp = self._post_request(self.as_v3, request)
    
            if type_name is not None and len(resp['objects']) == 1:
    
                return PropertyAssignments(self, resp['objects'][0])
    
            if len(resp['objects']) >= 1:
                types = DataFrame(resp['objects'])
    
                types['modificationDate'] = types['modificationDate'].map(format_timestamp)
    
                attributes = self._get_attributes(type_name, types, additional_attributes, optional_attributes)
    
                return Things(self, entity.lower() + '_type', types[attributes])
    
    
                raise ValueError("Nothing found!")
    
        def _get_attributes(self, type_name, types, additional_attributes, optional_attributes):
            attributes = ['code', 'description'] + additional_attributes
            attributes += [attribute for attribute in optional_attributes if attribute in types]
            attributes += ['modificationDate']
            if type_name is not None:
                attributes += ['propertyAssignments']
            return attributes
    
    
            """ checks whether a session is still active. Returns true or false.
            """
    
            This method is useful to check if a token is still valid or if it has timed out,
            requiring the user to login again.
    
            :return: Return True if the token is valid, False if it is not valid.
            """
    
            request = {
                "method": "isSessionActive",
    
            resp = self._post_request(self.as_v1, request)
    
            return resp
    
        def get_dataset(self, permid, only_data=False):
    
            """fetch a dataset and some metadata attached to it:
            - properties
            - sample
            - parents
            - children
            - containers
            - dataStore
            - physicalData
            - linkedData
            :return: a DataSet object
            """
    
            criteria = [{
                "permId": permid,
                "@type": "as.dto.dataset.id.DataSetPermId"
            }]
    
            fetchopts = {
    
                "parents": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
                "children": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
                "containers": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
                "type": {"@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions"},
    
            for option in ['tags', 'properties', 'dataStore', 'physicalData', 'linkedData',
    
                           'experiment', 'sample']:
                fetchopts[option] = fetch_option[option]
    
            request = {
    
                "method": "getDataSets",
    
                "params": [
                    self.token,
                    criteria,
                    fetchopts,
                ],
    
            resp = self._post_request(self.as_v3, request)
    
            if resp is None or len(resp) == 0:
    
                raise ValueError('no such dataset found: ' + permid)
    
            for permid in resp:
                if only_data:
                    return resp[permid]
                else:
                    return DataSet(
                        self, 
                        type=self.get_dataset_type(resp[permid]["type"]["code"]),
                        data=resp[permid]
                    )
    
        def get_sample(self, sample_ident, only_data=False, withAttachments=False):
    
            """Retrieve metadata for the sample.
            Get metadata for the sample and any directly connected parents of the sample to allow access
            to the same information visible in the ELN UI. The metadata will be on the file system.
            :param sample_identifiers: A list of sample identifiers to retrieve.
            """
    
            search_request = search_request_for_identifier(sample_ident, 'sample')
    
            fetchopts = {"type": {"@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions"}}
    
            for option in ['tags', 'properties', 'attachments', 'space', 'experiment', 'registrator', 'dataSets']:
                fetchopts[option] = fetch_option[option]
    
            if withAttachments:
                fetchopts['attachments'] = fetch_option['attachmentsWithContent']
    
    
            for key in ['parents','children','container','components']:
                fetchopts[key] = {"@type": "as.dto.sample.fetchoptions.SampleFetchOptions"}
    
    
            sample_request = {
                "method": "getSamples",
                "params": [
                    self.token,
    
            resp = self._post_request(self.as_v3, sample_request)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if resp is None or len(resp) == 0:
    
                raise ValueError('no such sample found: ' + sample_ident)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            else:
    
                for sample_ident in resp:
    
                    if only_data:
                        return resp[sample_ident]
                    else:
                        return Sample(self, self.get_sample_type(resp[sample_ident]["type"]["code"]), resp[sample_ident])
    
        def get_external_data_management_system(self, permId, only_data=False):
    
            """Retrieve metadata for the external data management system.
    
            :param permId: A permId for an external DMS.
    
            :param only_data: Return the result data as a hash-map, not an object.
            """
    
            request = {
                "method": "getExternalDataManagementSystems",
                "params": [
                    self.token,
                    [{
                        "@type": "as.dto.externaldms.id.ExternalDmsPermId",
    
                    }],
                    {},
                ],
            }
    
            resp = self._post_request(self.as_v3, request)
            parse_jackson(resp)
    
            if resp is None or len(resp) == 0:
    
                raise ValueError('no such external DMS found: ' + permId)
    
            else:
                for ident in resp:
                    if only_data:
                        return resp[ident]
                    else:
                        return ExternalDMS(self, resp[ident])
    
    
        def new_space(self, **kwargs):
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            """ Creates a new space in the openBIS instance.
    
            return Space(self, None, **kwargs)
    
    
        def new_git_data_set(self, data_set_type, path, commit_id, repository_id, dms, sample=None, experiment=None, properties={},
    
                             dss_code=None, parents=None, data_set_code=None, contents=[]):
    
            """ Create a link data set.
            :param data_set_type: The type of the data set
    
            :param path: The path to the git repository
            :param commit_id: The git commit id
    
            :param repository_id: The git repository id - same for copies
    
            :param dms: An external data managment system object or external_dms_id
            :param sample: A sample object or sample id.
    
            :param dss_code: Code for the DSS -- defaults to the first dss if none is supplied.
    
            :param properties: Properties for the data set.
            :param parents: Parents for the data set.
    
            :param data_set_code: A data set code -- used if provided, otherwise generated on the server
    
            :param contents: A list of dicts that describe the contents:
                {'file_length': [file length],
                 'crc32': [crc32 checksum],
                 'directory': [is path a directory?]
                 'path': [the relative path string]}
    
            return pbds.GitDataSetCreation(self, data_set_type, path, commit_id, repository_id, dms, sample, experiment,
    
                                           properties, dss_code, parents, data_set_code, contents).new_git_data_set()
    
        def new_content_copy(self, path, commit_id, repository_id, edms_id, data_set_id):
            """
            Create a content copy in an existing link data set.
            :param path: path of the new content copy
            "param commit_id: commit id of the new content copy
            "param repository_id: repository id of the content copy
            "param edms_id: Id of the external data managment system of the content copy
            "param data_set_id: Id of the data set to which the new content copy belongs
            """
            return pbds.GitDataSetUpdate(self, path, commit_id, repository_id, edms_id, data_set_id).new_content_copy()
    
    
            """Take sample which may be a string or object and return an identifier for it."""
    
            return Openbis._object_to_object_id(sample, "as.dto.sample.id.SampleIdentifier", "as.dto.sample.id.SamplePermId");
    
        @staticmethod
        def experiment_to_experiment_id(experiment):
            """Take experiment which may be a string or object and return an identifier for it."""
            return Openbis._object_to_object_id(experiment, "as.dto.experiment.id.ExperimentIdentifier", "as.dto.experiment.id.SamplePermId");
    
        @staticmethod
        def _object_to_object_id(obj, identifierType, permIdType):
            object_id = None
            if isinstance(obj, str):
                if (is_identifier(obj)):
                    object_id = {
                        "identifier": obj,
                        "@type": identifierType
    
                    object_id = {
                        "permId": obj,
                        "@type": permIdType
    
                object_id = {
                    "identifier": obj.identifier,
                    "@type": identifierType
    
        @staticmethod
        def data_set_to_data_set_id(data_set):
            if isinstance(data_set, str):
                code = data_set
            else:
                code = data_set.permId
            return {
                "permId": code,
                "@type": "as.dto.dataset.id.DataSetPermId"
            }
    
    
        def external_data_managment_system_to_dms_id(self, dms):
            if isinstance(dms, str):
                dms_id = {
                    "permId": dms,
                    "@type": "as.dto.externaldms.id.ExternalDmsPermId"
                }
            else:
                dms_id = {
                    "identifier": dms.code,
                    "@type": "as.dto.sample.id.SampleIdentifier"
                }
            return dms_id
    
    
        def new_sample(self, type, props=None, **kwargs):
    
            """ Creates a new sample of a given sample type.
    
            return Sample(self, self.get_sample_type(type), None, props, **kwargs)
    
        def new_dataset(self, type=None, files=None, props=None, folder=None, **kwargs):
    
            """ Creates a new dataset of a given sample type.
            """
    
            if files is None:
                raise ValueError('please provide at least one file')
            elif isinstance(files, str):
                files = [files]
    
    
            type_obj = self.get_dataset_type(type.upper())
    
            return DataSet(self, type=type_obj, files=files, folder=folder, props=props, **kwargs)
    
        
        def new_semantic_annotation(self, entityType=None, propertyType=None, **kwargs):
    
            return SemanticAnnotation(
                openbis_obj=self, isNew=True, 
                entityType=entityType, propertyType=propertyType, **kwargs
            )    
    
        def _get_dss_url(self, dss_code=None):
            """ internal method to get the downloadURL of a datastore.
            """
    
            dss = self.get_datastores()
            if dss_code is None:
                return dss['downloadUrl'][0]
    
                return dss[dss['code'] == dss_code]['downloadUrl'][0]
    
    class LinkedData():
        def __init__(self, data=None):
            self.data = data if data is not None else []
            self.attrs = ['externalCode', 'contentCopies']
    
        def __dir__(self):
            return self.attrs
    
        def __getattr__(self, name):
            if name in self.attrs:
                if name in self.data:
                    return self.data[name]
            else:
                return ''
    
    
    
    class PhysicalData():
        def __init__(self, data=None):
            if data is None:
                data = []
            self.data = data
    
            self.attrs = ['speedHint', 'complete', 'shareId', 'size',
                          'fileFormatType', 'storageFormat', 'location', 'presentInArchive',
                          'storageConfirmation', 'locatorType', 'status']
    
    
        def __dir__(self):
            return self.attrs
    
        def __getattr__(self, name):
            if name in self.attrs:
                if name in self.data:
                    return self.data[name]
            else:
                return ''
    
        def _repr_html_(self):
            html = """
                <table border="1" class="dataframe">
                <thead>
                    <tr style="text-align: right;">
                    <th>attribute</th>
                    <th>value</th>
                    </tr>
                </thead>
                <tbody>
            """
    
            for attr in self.attrs:
                html += "<tr> <td>{}</td> <td>{}</td> </tr>".format(
    
                )
    
            html += """
                </tbody>
                </table>
            """
            return html
    
        def __repr__(self):
    
            headers = ['attribute', 'value']
            lines = []
            for attr in self.attrs:
                lines.append([
                    attr,
                    getattr(self, attr, '')
                ])
            return tabulate(lines, headers=headers)
    
    
    
    
    
    class ExternalDMS():
        """ managing openBIS external data management systems
    
        def __init__(self, openbis_obj, data=None, **kwargs):
            self.__dict__['openbis'] = openbis_obj
    
            if data is not None:
    
            if kwargs is not None:
                for key in kwargs:
                    setattr(self, key, kwargs[key])
    
        def __getattr__(self, name):
            return self.__dict__['data'].get(name)
    
    
        def __dir__(self):
    
            """all the available methods and attributes that should be displayed
            when using the autocompletion feature (TAB) in Jupyter
            """
            return ['code', 'label', 'urlTemplate', 'address', 'addressType', 'openbis']
    
        def __str__(self):
            return self.data.get('code', None)
    
    class Things():
        """An object that contains a DataFrame object about an entity  available in openBIS.
           
        """
    
        def __init__(self, openbis_obj, entity, df, identifier_name='code'):
            self.openbis = openbis_obj
            self.entity = entity
            self.df = df
            self.identifier_name = identifier_name
    
        def __repr__(self):
            return tabulate(self.df, headers=list(self.df))
    
        def _repr_html_(self):
            return self.df._repr_html_()
    
        def get_parents(self, **kwargs):
            if self.entity not in ['sample', 'dataset']:
                raise ValueError("{}s do not have parents".format(self.entity))
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    # get all objects that have this object as a child == parent
                    try:
                        parents = getattr(self.openbis, 'get_' + self.entity.lower() + 's')(withChildren=ident, **kwargs)
                        dfs.append(parents.df)
                    except ValueError:
                        pass
    
                if len(dfs) > 0:
                    return Things(self.openbis, self.entity, pd.concat(dfs), self.identifier_name)
                else:
                    return Things(self.openbis, self.entity, DataFrame(), self.identifier_name)
    
        def get_children(self, **kwargs):
            if self.entity not in ['sample', 'dataset']:
                raise ValueError("{}s do not have children".format(self.entity))
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    # get all objects that have this object as a child == parent
                    try:
                        parents = getattr(self.openbis, 'get_' + self.entity.lower() + 's')(withParent=ident, **kwargs)
                        dfs.append(parents.df)
                    except ValueError:
                        pass
    
                if len(dfs) > 0:
                    return Things(self.openbis, self.entity, pd.concat(dfs), self.identifier_name)
                else:
                    return Things(self.openbis, self.entity, DataFrame(), self.identifier_name)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def get_samples(self, **kwargs):
    
            if self.entity not in ['space', 'project', 'experiment']:
                raise ValueError("{}s do not have samples".format(self.entity))
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    args = {}
                    args[self.entity.lower()] = ident
                    try:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        samples = self.openbis.get_samples(**args, **kwargs)
    
                        dfs.append(samples.df)
                    except ValueError:
                        pass
    
                if len(dfs) > 0:
                    return Things(self.openbis, 'sample', pd.concat(dfs), 'identifier')
                else:
                    return Things(self.openbis, 'sample', DataFrame(), 'identifier')
    
    
        get_objects = get_samples # Alias
    
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def get_datasets(self, **kwargs):
    
            if self.entity not in ['sample', 'experiment']:
    
                raise ValueError("{}s do not have datasets".format(self.entity))
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    args = {}
                    args[self.entity.lower()] = ident
                    try:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        datasets = self.openbis.get_datasets(**args, **kwargs)
    
                        dfs.append(datasets.df)
                    except ValueError:
                        pass
    
                if len(dfs) > 0:
                    return Things(self.openbis, 'dataset', pd.concat(dfs), 'permId')
                else:
                    return Things(self.openbis, 'dataset', DataFrame(), 'permId')
    
    
        def __getitem__(self, key):
            if self.df is not None and len(self.df) > 0:
                row = None
                if isinstance(key, int):
                    # get thing by rowid
                    row = self.df.loc[[key]]
    
                elif isinstance(key, list):
                    # treat it as a normal dataframe
                    return self.df[key]
    
                else:
                    # get thing by code
    
                    row = self.df[self.df[self.identifier_name] == key.upper()]
    
    
                if row is not None:
    
                    # invoke the openbis.get_<entity>() method
    
                    return getattr(self.openbis, 'get_' + self.entity)(row[self.identifier_name].values[0])
    
        def __iter__(self):
            for item in self.df[[self.identifier_name]][self.identifier_name].iteritems():
    
                yield getattr(self.openbis, 'get_' + self.entity)(item[1])
    
                # return self.df[[self.identifier_name]].to_dict()[self.identifier_name]