Skip to content
Snippets Groups Projects
things.py 8.91 KiB
Newer Older
  • Learn to ignore specific revisions
  • #   Copyright ETH 2018 - 2023 Zürich, Scientific IT Services
    # 
    #   Licensed under the Apache License, Version 2.0 (the "License");
    #   you may not use this file except in compliance with the License.
    #   You may obtain a copy of the License at
    # 
    #        http://www.apache.org/licenses/LICENSE-2.0
    #   
    #   Unless required by applicable law or agreed to in writing, software
    #   distributed under the License is distributed on an "AS IS" BASIS,
    #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    #   See the License for the specific language governing permissions and
    #   limitations under the License.
    #
    
    from tabulate import tabulate
    
    from pandas import DataFrame, Series
    import pandas as pd
    
    
    class Things:
    
        """An object that contains a DataFrame object about an entity  available in openBIS.
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        Use .df to work with the DataFrame instead.
        Can be used in a for-loop:
    
        for sample in openbis.get_samples():
            ...
    
        You can access an element directly by providing the index number:
            openbis.get_samples()[7]
    
        Because the order of the elements cannot be ensured, you should choose the identifier instead:
            openbis.get_samples()['/SOME_SPACE/SAMPLE_CODE']
    
    
        Of course, if you know the identifier already, you would rather do:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            openbis.get_sample('/SOME_SPACE/SAMPLE_CODE')
    
            self,
            openbis_obj,
            entity,
            identifier_name="code",
            additional_identifier=None,
            start_with=None,
            count=None,
            totalCount=None,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            single_item_method=None,
    
            response=None,
            df_initializer=None,
            objects_initializer=None,
            attrs=None,
    
            props=None,
    
            self.openbis = openbis_obj
            self.entity = entity
    
            self.identifier_name = identifier_name
    
            self.additional_identifier = additional_identifier
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            self.start_with = start_with
            self.count = count
    
            self.totalCount = totalCount
            self.single_item_method = single_item_method
    
            self.__objects_initializer = objects_initializer
            self.__df_initializer = df_initializer
            self.__attrs = attrs
            self.__props = props
    
    
    vkovtun's avatar
    vkovtun committed
        def is_df_initialised(self):
            return self.__df is not None
    
        def is_objects_initialised(self):
            return self.__objects is not None
    
    
        @property
        def df(self):
            if self.__df is None and self.__df_initializer is not None:
    
                self.__df = self.__df_initializer(
                    attrs=self.__attrs, props=self.__props, response=self.response
                )
    
            return self.__df
    
        @property
        def objects(self):
            if self.__objects is None and self.__objects_initializer is not None:
                self.__objects = self.__objects_initializer(response=self.response)
            return self.__objects
    
        def __repr__(self, headers=None, sort_by=None):
            if headers is None:
                headers = list(self.df)
            if sort_by:
                return tabulate(self.df.sort_values(by=sort_by), headers=headers)
            return tabulate(self.df, headers=headers)
    
    
        def __len__(self):
            return len(self.df)
    
        def _repr_html_(self):
            return self.df._repr_html_()
    
    
        @staticmethod
        def __create_data_frame(attrs, props, response):
            if len(response) > 0:
                return pd.concat(response)
            else:
                return DataFrame()
    
    
        def get_parents(self, **kwargs):
    
            if self.entity not in ["sample", "dataset"]:
    
                raise ValueError(f"{self.entity}s do not have parents")
    
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    # get all objects that have this object as a child == parent
                    try:
    
                        parents = getattr(self.openbis, "get_" + self.entity.lower() + "s")(
                            withChildren=ident, **kwargs
                        )
    
                        dfs.append(parents.df)
                    except ValueError:
                        pass
    
                return Things(
                    self.openbis,
                    self.entity,
                    self.identifier_name,
                    response=dfs,
                    df_initializer=self.__create_data_frame,
                )
    
    
        def get_children(self, **kwargs):
    
            if self.entity not in ["sample", "dataset"]:
    
                raise ValueError(f"{self.entity}s do not have children")
    
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    # get all objects that have this object as a child == parent
                    try:
    
                        parents = getattr(self.openbis, "get_" + self.entity.lower() + "s")(
                            withParent=ident, **kwargs
                        )
    
                        dfs.append(parents.df)
                    except ValueError:
                        pass
    
    
                    return Things(
                        self.openbis,
                        self.entity,
                        self.identifier_name,
                        response=dfs,
                        df_initializer=self.__create_data_frame,
                    )
    
    
        def get_samples(self, **kwargs):
    
            if self.entity not in ["space", "project", "experiment"]:
    
                raise ValueError(f"{self.entity}s do not have samples")
    
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    args = {}
                    args[self.entity.lower()] = ident
                    try:
                        samples = self.openbis.get_samples(**args, **kwargs)
                        dfs.append(samples.df)
                    except ValueError:
                        pass
    
    
                return Things(
                    self.openbis,
                    "sample",
                    "identifier",
                    response=dfs,
                    df_initializer=self.__create_data_frame,
                )
    
        get_objects = get_samples  # Alias
    
    
        def get_datasets(self, **kwargs):
    
            if self.entity not in ["sample", "experiment"]:
    
                raise ValueError(f"{self.entity}s do not have datasets")
    
    
            if self.df is not None and len(self.df) > 0:
                dfs = []
                for ident in self.df[self.identifier_name]:
                    args = {}
                    args[self.entity.lower()] = ident
                    try:
                        datasets = self.openbis.get_datasets(**args, **kwargs)
                        dfs.append(datasets.df)
                    except ValueError:
                        pass
    
    
                return Things(
                    self.openbis,
                    "dataset",
                    "permId",
                    response=dfs,
                    df_initializer=self.__create_data_frame,
                )
    
            """elegant way to fetch a certain element from the displayed list.
    
            If an integer value is given, we choose the row.
            If the key is a list, we return the desired columns (normal dataframe behaviour)
            If the key is a non-integer value, we treat it as a primary-key lookup
            """
    
            if self.df is not None and len(self.df) > 0:
                row = None
                if isinstance(key, int):
    
                    if self.objects:
                        return self.objects[key]
                    else:
                        # get thing by rowid
                        row = self.df.loc[[key]]
    
                elif isinstance(key, list):
                    # treat it as a normal dataframe
                    return self.df[key]
                else:
                    # get thing by code
                    row = self.df[self.df[self.identifier_name] == key.upper()]
    
                if row is not None:
                    # invoke the openbis.get_<entity>() method
    
                    if self.single_item_method:
                        get_item = self.single_item_method
                    else:
    
                        get_item = getattr(self.openbis, "get_" + self.entity)
    
                    if self.additional_identifier is None:
    
                        return get_item(row[self.identifier_name].values[0])
    
                            row[self.identifier_name].values[0],
                            row[self.additional_identifier].values[0],
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if self.objects:
                for obj in self.objects:
                    yield obj
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if self.single_item_method:
                    get_item = self.single_item_method
                else:
    
                    get_item = getattr(self.openbis, "get_" + self.entity)
                for item in self.df[[self.identifier_name]][
                    self.identifier_name
                ].iteritems():
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    yield get_item(item[1])
    
                    # yield getattr(self.openbis, 'get_' + self.entity)(item[1])
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    # return self.df[[self.identifier_name]].to_dict()[self.identifier_name]