pybis.py

                else:
                    raise ValueError(
                        "There is more than one entry for entity={} and type={}".format(
                            entity, type_name
                        )
                    )

            types = []
            attrs = self._get_attributes(
                type_name, types, additional_attributes, optional_attributes
            )
            objects = response["objects"]
            if len(objects) == 0:
                types = DataFrame(columns=attrs)
            else:
                parse_jackson(objects)
                types = DataFrame(objects)
                types["modificationDate"] = types["modificationDate"].map(
                    format_timestamp
                )
            return types[attrs]

        return Things(
            openbis_obj=self,
            entity=entity.lower() + "_type",
            start_with=start_with,
            count=count,
            totalCount=resp.get("totalCount"),
            response=resp,
            df_initializer=create_data_frame,
        )

    def _get_attributes(
        self, type_name, types, additional_attributes, optional_attributes
    ):
        attributes = ["code", "description"] + additional_attributes
        attributes += [
            attribute for attribute in optional_attributes if attribute in types
        ]
        attributes += ["modificationDate"]
        if type_name is not None:
            attributes += ["propertyAssignments"]
        return attributes

    def is_session_active(self):
        """checks whether a session is still active. Returns true or false."""
        return self.is_token_valid(self.token)

    def is_token_valid(self, token=None):
        """Check if the connection to openBIS is valid.
        This method is useful to check if a token is still valid or if it has timed out,
        requiring the user to login again.
        :return: Return True if the token is valid, False if it is not valid.
        """
        if token is None:
            token = self.token

        if token is None:
            return False

        request = {
            "method": "isSessionActive",
            "params": [token],
        }
        try:
            resp = self._post_request(self.as_v1, request)
        except Exception:
            return False

        return resp

    def set_token(self, token, save_token=True):
        """Checks the validity of a token, sets it as the current token and (by default) saves it
        to the disk, i.e. in the ~/.pybis directory
        """
        if not self.is_token_valid(token):
            raise ValueError("session token seems not to be valid.")
        else:
            self.token = token

        self._save_token_to_disk(token=token)
        # TODO: find out what this is good for
        if os.environ.get("OPENBIS_URL") == self.url:
            os.environ["OPENBIS_TOKEN"] = self.token

    def get_dataset(self, permIds, only_data=False, props=None, **kvals):
        """fetch a dataset and some metadata attached to it:
        - properties
        - sample
        - parents
        - children
        - containers
        - dataStore
        - physicalData
        - linkedData
        :return: a DataSet object
        """

        just_one = True
        identifiers = []
        if isinstance(permIds, list):
            just_one = False
            for permId in permIds:
                identifiers.append(_type_for_id(permId, "dataset"))
        else:
            identifiers.append(_type_for_id(permIds, "dataset"))

        fetchopts = fetch_option["dataSet"]

        for option in [
            "tags",
            "properties",
            "dataStore",
            "physicalData",
            "linkedData",
            "experiment",
            "sample",
            "registrator",
            "modifier",
        ]:
            fetchopts[option] = fetch_option[option]

        request = {
            "method": "getDataSets",
            "params": [
                self.token,
                identifiers,
                fetchopts,
            ],
        }

        resp = self._post_request(self.as_v3, request)
        if just_one:
            if len(resp) == 0:
                raise ValueError("no such dataset found: {}".format(permIds))

            parse_jackson(resp)

            for permId in resp:
                if only_data:
                    return resp[permId]
                else:
                    return DataSet(
                        openbis_obj=self,
                        type=self.get_dataset_type(resp[permId]["type"]["code"]),
                        data=resp[permId],
                    )
        else:
            return self._dataset_list_for_response(
                response=list(resp.values()), props=props, parsed=False
            )

    def _dataset_list_for_response(
        self,
        response,
        attrs=None,
        props=None,
        start_with=None,
        count=None,
        totalCount=0,
        objects=None,
        parsed=False,
    ):
        """returns a Things object, containing a DataFrame plus some additional information"""

        def extract_attribute(attribute_to_extract):
            def return_attribute(obj):
                if obj is None:
                    return ""
                return obj.get(attribute_to_extract, "")

            return return_attribute

        if not parsed:
            parse_jackson(response)

        if attrs is None:
            attrs = []

        def extract_project(attr):
            entity, _, attr = attr.partition(".")

            def extract_attr(obj):
                try:
                    if attr:
                        return obj["project"][attr]
                    else:
                        return obj["project"]["identifier"]["identifier"]
                except KeyError:
                    return ""

            return extract_attr

        def extract_space(attr):
            entity, _, attr = attr.partition(".")

            def extract_attr(obj):
                try:
                    if attr:
                        return obj["project"]["space"][attr]
                    else:
                        return obj["project"]["space"]["code"]
                except KeyError:
                    return ""

            return extract_attr

        def create_data_frame(attrs, props, response):
            default_attrs = [
                "permId",
                "type",
                "experiment",
                "sample",
                "registrationDate",
                "modificationDate",
                "location",
                "status",
                "presentInArchive",
                "size",
            ]
            display_attrs = default_attrs + attrs

            if props is None:
                props = []
            else:
                if isinstance(props, str):
                    props = [props]

            if len(response) == 0:
                for prop in props:
                    if prop == "*":
                        continue
                    display_attrs.append(prop)
                datasets = DataFrame(columns=display_attrs)
            else:
                datasets = DataFrame(response)
                for attr in attrs:
                    if "project" in attr:
                        datasets[attr] = datasets["experiment"].map(
                            extract_project(attr)
                        )
                    elif "space" in attr:
                        datasets[attr] = datasets["experiment"].map(extract_space(attr))
                    elif "." in attr:
                        entity, attribute_to_extract = attr.split(".")
                        datasets[attr] = datasets[entity].map(
                            extract_attribute(attribute_to_extract)
                        )
                for attr in attrs:
                    # if no dot supplied, just display the code of the space, project or experiment
                    if any(entity == attr for entity in ["experiment", "sample"]):
                        datasets[attr] = datasets[attr].map(extract_nested_identifier)

                datasets["registrationDate"] = datasets["registrationDate"].map(
                    format_timestamp
                )
                datasets["modificationDate"] = datasets["modificationDate"].map(
                    format_timestamp
                )
                datasets["experiment"] = datasets["experiment"].map(
                    extract_nested_identifier
                )
                datasets["sample"] = datasets["sample"].map(extract_nested_identifier)
                datasets["type"] = datasets["type"].map(extract_code)
                datasets["permId"] = datasets["code"]
                for column in ["parents", "children", "components", "containers"]:
                    if column in datasets:
                        datasets[column] = datasets[column].map(extract_identifiers)
                datasets["size"] = datasets["physicalData"].map(
                    lambda x: x.get("size") if x else ""
                )
                datasets["status"] = datasets["physicalData"].map(
                    lambda x: x.get("status") if x else ""
                )
                datasets["presentInArchive"] = datasets["physicalData"].map(
                    lambda x: x.get("presentInArchive") if x else ""
                )
                datasets["location"] = datasets["physicalData"].map(
                    lambda x: x.get("location") if x else ""
                )

                for prop in props:
                    if prop == "*":
                        # include all properties in dataFrame.
                        # expand the dataFrame by adding new columns
                        columns = []
                        for i, dataSet in enumerate(response):
                            for prop_name, val in dataSet.get("properties", {}).items():
                                datasets.loc[i, prop_name.upper()] = val
                                columns.append(prop_name.upper())

                        display_attrs += set(columns)
                        continue

                    else:
                        # property name is provided
                        for i, dataSet in enumerate(response):
                            val = dataSet.get("properties", {}).get(
                                prop, ""
                            ) or dataSet.get("properties", {}).get(prop.upper(), "")
                            datasets.loc[i, prop.upper()] = val
                        display_attrs.append(prop.upper())
            return datasets[display_attrs]

        def create_objects(response):
            return objects

        return Things(
            openbis_obj=self,
            entity="dataset",
            identifier_name="permId",
            start_with=start_with,
            count=count,
            totalCount=totalCount,
            attrs=attrs,
            props=props,
            response=response,
            df_initializer=create_data_frame,
            objects_initializer=create_objects,
        )

    def get_sample(
        self, sample_ident, only_data=False, withAttachments=False, props=None, **kvals
    ):
        """Retrieve metadata for the sample.
        Get metadata for the sample and any directly connected parents of the sample to allow access
        to the same information visible in the ELN UI. The metadata will be on the file system.
        :param sample_identifiers: A list of sample identifiers to retrieve.
        """

        only_one = True
        identifiers = []
        if isinstance(sample_ident, list):
            only_one = False
            for ident in sample_ident:
                identifiers.append(_type_for_id(ident, "sample"))
        else:
            identifiers.append(_type_for_id(sample_ident, "sample"))

        fetchopts = fetch_option["sample"]
        options = [
            "tags",
            "properties",
            "attachments",
            "space",
            "experiment",
            "registrator",
            "modifier",
            "dataSets",
        ]
        if self.get_server_information().project_samples_enabled:
            options.append("project")
        for option in options:
            fetchopts[option] = fetch_option[option]

        if withAttachments:
            fetchopts["attachments"] = fetch_option["attachmentsWithContent"]

        for key in ["parents", "children", "container", "components"]:
            fetchopts[key] = {"@type": "as.dto.sample.fetchoptions.SampleFetchOptions"}

        request = {
            "method": "getSamples",
            "params": [self.token, identifiers, fetchopts],
        }

        resp = self._post_request(self.as_v3, request)

        if only_one:
            if len(resp) == 0:
                raise ValueError("no such sample found: {}".format(sample_ident))

            parse_jackson(resp)
            for sample_ident in resp:
                if only_data:
                    return resp[sample_ident]
                else:
                    return Sample(
                        openbis_obj=self,
                        type=self.get_sample_type(resp[sample_ident]["type"]["code"]),
                        data=resp[sample_ident],
                    )
        else:
            return self._sample_list_for_response(
                response=list(resp.values()), props=props, parsed=False
            )

    def _sample_list_for_response(
        self,
        response,
        attrs=None,
        props=None,
        start_with=None,
        count=None,
        totalCount=0,
        parsed=False,
    ):
        logger = logging.getLogger("_sample_list_for_response")
        logger.setLevel(logging.CRITICAL)
        logger.disabled = True
        logger.addHandler(logging.StreamHandler(sys.stdout))

        time1 = now()

        logger.debug("_sample_list_for_response before parsing JSON")
        if not parsed:
            parse_jackson(response)

        time2 = now()

        logger.debug(f"_sample_list_for_response got response. Delay: {time2 - time1}")

        time6 = now()
        logger.debug("_sample_list_for_response computing result.")

        def create_data_frame(attrs, props, response):
            """returns a Things object, containing a DataFrame plus additional information"""

            def extract_attribute(attribute_to_extract):
                def return_attribute(obj):
                    if obj is None:
                        return ""
                    return obj.get(attribute_to_extract, "")

                return return_attribute

            logger = logging.getLogger("create_data_frame")
            logger.setLevel(logging.CRITICAL)
            logger.addHandler(logging.StreamHandler(sys.stdout))

            time2 = now()

            if attrs is None:
                attrs = []
            default_attrs = [
                "identifier",
                "permId",
                "type",
                "registrator",
                "registrationDate",
                "modifier",
                "modificationDate",
            ]
            display_attrs = default_attrs + attrs
            if props is None:
                props = []
            else:
                if isinstance(props, str):
                    props = [props]
            if len(response) == 0:
                for prop in props:
                    if prop == "*":
                        continue
                    display_attrs.append(prop)
                samples = DataFrame(columns=display_attrs)
            else:
                time3 = now()
                logger.debug(
                    f"createDataFrame computing attributes. Delay: {time3 - time2}"
                )

                samples = DataFrame(response)
                for attr in attrs:
                    if "." in attr:
                        entity, attribute_to_extract = attr.split(".")
                        samples[attr] = samples[entity].map(
                            extract_attribute(attribute_to_extract)
                        )
                    # if no dot supplied, just display the code of the space, project or experiment
                    elif attr in ["project", "experiment"]:
                        samples[attr] = samples[attr].map(extract_nested_identifier)
                    elif attr in ["space"]:
                        samples[attr] = samples[attr].map(extract_code)

                samples["registrationDate"] = samples["registrationDate"].map(
                    format_timestamp
                )
                samples["modificationDate"] = samples["modificationDate"].map(
                    format_timestamp
                )
                samples["registrator"] = samples["registrator"].map(extract_person)
                samples["modifier"] = samples["modifier"].map(extract_person)
                samples["identifier"] = samples["identifier"].map(extract_identifier)
                samples["container"] = samples["container"].map(
                    extract_nested_identifier
                )
                for column in ["parents", "children", "components"]:
                    if column in samples:
                        samples[column] = samples[column].map(extract_identifiers)
                samples["permId"] = samples["permId"].map(extract_permid)
                samples["type"] = samples["type"].map(extract_nested_permid)

                time4 = now()
                logger.debug(
                    f"_sample_list_for_response computed attributes. Delay: {time4 - time3}"
                )

                for prop in props:
                    if prop == "*":
                        # include all properties in dataFrame.
                        # expand the dataFrame by adding new columns
                        columns = []
                        for i, sample in enumerate(response):
                            for prop_name, val in sample.get("properties", {}).items():
                                samples.loc[i, prop_name.upper()] = val
                                columns.append(prop_name.upper())

                        display_attrs += set(columns)
                        continue
                    else:
                        # property name is provided
                        for i, sample in enumerate(response):
                            if "properties" in sample:
                                properties = sample["properties"]
                                val = properties.get(prop, "") or properties.get(
                                    prop.upper(), ""
                                )
                                samples.loc[i, prop.upper()] = val
                            else:
                                samples.loc[i, prop.upper()] = ""
                        display_attrs.append(prop.upper())

                time5 = now()
                logger.debug(
                    f"_sample_list_for_response computed properties. Delay: {time5 - time4}"
                )
            return samples[display_attrs]

        def create_objects(response):
            return list(
                map(
                    lambda obj: Sample(
                        openbis_obj=self,
                        type=self.get_sample_type(obj["type"]["code"]),
                        data=obj,
                    ),
                    response,
                )
            )

        result = Things(
            openbis_obj=self,
            entity="sample",
            identifier_name="identifier",
            start_with=start_with,
            count=count,
            totalCount=totalCount,
            response=response,
            df_initializer=create_data_frame,
            objects_initializer=create_objects,
            attrs=attrs,
            props=props,
        )

        time7 = now()
        logger.debug(
            f"_sample_list_for_response computed result. Delay: {time7 - time6}"
        )
        return result

    @staticmethod
    def decode_attribute(entity, attribute):
        params = {}
        attribute, *alias = re.split(r"\s+AS\s+", attribute, flags=re.IGNORECASE)
        alias = alias[0] if alias else attribute

        regex = re.compile(
            r"""^                         # beginning of the string
                (?P<requested_entity>\w+) # the entity itself
                (\.(?P<attribute>\w+))?   # capture an optional .attribute
                $                         # end of string
        """,
            re.X,
        )
        match = re.search(regex, attribute)
        params = match.groupdict()

        if params["requested_entity"] == "object":
            params["entity"] = "sample"
        elif params["requested_entity"] == "collection":
            params["entity"] = "experiment"
        elif params["requested_entity"] in ["space", "project"]:
            params["entity"] = params["requested_entity"]
        else:
            params["entity"] = params["requested_entity"]

        if not params["attribute"]:
            params["attribute"] = "code"
        params["alias"] = alias

        del params["requested_entity"]
        return params

    def _decode_property(self, entity, property):
        # match something like: property_name.term.label AS label_alias
        regex = re.compile(
            r"""^
                (?P<alias_alternative>
                (?P<property>[^\.]*  )
                (?:
                    \.
                    (?P<subentity>term|pa) \.
                    (?P<field>code|vocabularyCode|label|description|ordinal|dataType)
                )?
                )
                (
                \s+(?i)AS\s+
                (?P<alias>\w+)
                )?
                \s*
                $
            """,
            re.X,
        )
        match = re.search(regex, property)
        if not match:
            try:
                params = self.decode_attribute(entity, property)
                return params
            except ValueError:
                raise ValueError(f"unable to parse property: {property}")
        params = match.groupdict()
        if not params["alias"]:
            params["alias"] = params["alias_alternative"]

        return params

    get_object = get_sample  # Alias

    def get_external_data_management_systems(
        self, start_with=None, count=None, only_data=False
    ):
        entity = "externalDms"

        criteria = get_type_for_entity(entity, "search")
        fetchopts = get_fetchoption_for_entity(entity)
        request = {
            "method": "searchExternalDataManagementSystems",
            "params": [
                self.token,
                criteria,
                fetchopts,
            ],
        }
        response = self._post_request(self.as_v3, request)

        def create_data_frame(attrs, props, response):
            parse_jackson(response)
            attrs = "code label address addressType urlTemplate openbis".split()

            if len(response["objects"]) == 0:
                entities = DataFrame(columns=attrs)
            else:
                objects = response["objects"]
                parse_jackson(objects)
                entities = DataFrame(objects)
                entities["permId"] = entities["permId"].map(extract_permid)
            return entities[attrs]

        return Things(
            openbis_obj=self,
            entity="externalDms",
            identifier_name="permId",
            start_with=start_with,
            count=count,
            totalCount=response.get("totalCount"),
            response=response,
            df_initializer=create_data_frame,
        )

    def get_external_data_management_system(self, permId, only_data=False):
        """Retrieve metadata for the external data management system.
        :param permId: A permId for an external DMS.
        :param only_data: Return the result data as a hash-map, not an object.
        """

        request = {
            "method": "getExternalDataManagementSystems",
            "params": [
                self.token,
                [
                    {
                        "@type": "as.dto.externaldms.id.ExternalDmsPermId",
                        "permId": permId,
                    }
                ],
                {
                    "@type": "as.dto.externaldms.fetchoptions.ExternalDmsFetchOptions",
                },
            ],
        }

        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)

        if resp is None or len(resp) == 0:
            raise ValueError("no such external DMS found: " + permId)
        else:
            for ident in resp:
                if only_data:
                    return resp[ident]
                else:
                    return ExternalDMS(self, resp[ident])

    get_externalDms = get_external_data_management_system  # alias

    def new_space(self, **kwargs):
        """Creates a new space in the openBIS instance."""
        return Space(self, None, **kwargs)

    def new_git_data_set(
        self,
        data_set_type,
        path,
        commit_id,
        repository_id,
        dms,
        sample=None,
        experiment=None,
        properties={},
        dss_code=None,
        parents=None,
        data_set_code=None,
        contents=[],
    ):
        """Create a link data set.
        :param data_set_type: The type of the data set
        :param data_set_type: The type of the data set
        :param path: The path to the git repository
        :param commit_id: The git commit id
        :param repository_id: The git repository id - same for copies
        :param dms: An external data managment system object or external_dms_id
        :param sample: A sample object or sample id.
        :param dss_code: Code for the DSS -- defaults to the first dss if none is supplied.
        :param properties: Properties for the data set.
        :param parents: Parents for the data set.
        :param data_set_code: A data set code -- used if provided, otherwise generated on the server
        :param contents: A list of dicts that describe the contents:
            {'file_length': [file length],
             'crc32': [crc32 checksum],
             'directory': [is path a directory?]
             'path': [the relative path string]}
        :return: A DataSet object
        """
        return pbds.GitDataSetCreation(
            self,
            data_set_type,
            path,
            commit_id,
            repository_id,
            dms,
            sample,
            experiment,
            properties,
            dss_code,
            parents,
            data_set_code,
            contents,
        ).new_git_data_set()

    def new_content_copy(self, path, commit_id, repository_id, edms_id, data_set_id):
        """
        Create a content copy in an existing link data set.
        :param path: path of the new content copy
        "param commit_id: commit id of the new content copy
        "param repository_id: repository id of the content copy
        "param edms_id: Id of the external data managment system of the content copy
        "param data_set_id: Id of the data set to which the new content copy belongs
        """
        return pbds.GitDataSetUpdate(self, data_set_id).new_content_copy(
            path, commit_id, repository_id, edms_id
        )

    def search_files(self, data_set_id, dss_code=None):
        return pbds.GitDataSetFileSearch(self, data_set_id).search_files()

    def delete_content_copy(self, data_set_id, content_copy):
        """
        Deletes a content copy from a data set.
        :param data_set_id: Id of the data set containing the content copy
        :param content_copy: The content copy to be deleted
        """
        return pbds.GitDataSetUpdate(self, data_set_id).delete_content_copy(
            content_copy
        )

    @staticmethod
    def sample_to_sample_id(sample):
        """Take sample which may be a string or object and return an identifier for it."""
        return Openbis._object_to_object_id(
            sample, "as.dto.sample.id.SampleIdentifier", "as.dto.sample.id.SamplePermId"
        )

    @staticmethod
    def experiment_to_experiment_id(experiment):
        """Take experiment which may be a string or object and return an identifier for it."""
        return Openbis._object_to_object_id(
            experiment,
            "as.dto.experiment.id.ExperimentIdentifier",
            "as.dto.experiment.id.SamplePermId",
        )

    @staticmethod
    def _object_to_object_id(obj, identifierType, permIdType):
        object_id = None
        if isinstance(obj, str):
            if is_identifier(obj):
                object_id = {"identifier": obj, "@type": identifierType}
            else:
                object_id = {"permId": obj, "@type": permIdType}
        else:
            object_id = {"identifier": obj.identifier, "@type": identifierType}
        return object_id

    @staticmethod
    def data_set_to_data_set_id(data_set):
        if isinstance(data_set, str):
            code = data_set
        else:
            code = data_set.permId
        return {"permId": code, "@type": "as.dto.dataset.id.DataSetPermId"}

    def external_data_managment_system_to_dms_id(self, dms):
        if isinstance(dms, str):
            dms_id = {"permId": dms, "@type": "as.dto.externaldms.id.ExternalDmsPermId"}
        else:
            dms_id = {
                "identifier": dms.code,
                "@type": "as.dto.sample.id.SampleIdentifier",
            }
        return dms_id

    def new_sample(self, type, project=None, props=None, **kwargs):
        """Creates a new sample of a given sample type.
        type         -- sampleType code or object: mandatory
        code         -- name/code for the sample, if not generated automatically
        space        -- space code or object
        project      -- project code or object
        experiment   -- experiment code or object
        collection   -- same as above
        props        -- a dictionary containing the properties
        """
        if "collection" in kwargs:
            kwargs["experiment"] = kwargs["collection"]
            kwargs.pop("collection", None)

        if isinstance(type, str):
            sample_type = self.get_sample_type(type)
        else:
            sample_type = type
        return Sample(
            self, type=sample_type, project=project, data=None, props=props, **kwargs
        )

    new_object = new_sample  # Alias

    def new_transaction(self, *entities):
        return Transaction(*entities)

    def new_sample_type(
        self,
        code,
        generatedCodePrefix,
        subcodeUnique=False,
        autoGeneratedCode=False,
        listable=True,
        showContainer=False,
        showParents=True,
        showParentMetadata=False,
        validationPlugin=None,
    ):
        """Creates a new sample type."""

        return SampleType(
            self,
            code=code,
            generatedCodePrefix=generatedCodePrefix,
            autoGeneratedCode=autoGeneratedCode,
            listable=listable,
            showContainer=showContainer,
            showParents=showParents,
            showParentMetadata=showParentMetadata,
            validationPlugin=validationPlugin,
            method=self.get_sample_type,
        )

    new_object_type = new_sample_type

    def new_dataset_type(
        self,
        code,
        description=None,
        mainDataSetPattern=None,
        mainDataSetPath=None,
        disallowDeletion=False,
        validationPlugin=None,
    ):
        """Creates a new dataSet type."""

        return DataSetType(
            self,
            code=code,
            description=description,
            mainDataSetPattern=mainDataSetPattern,
            mainDataSetPath=mainDataSetPath,
            disallowDeletion=disallowDeletion,
            validationPlugin=validationPlugin,
            method=self.get_dataset_type,
        )

    def new_experiment_type(
        self,
        code,
        description=None,
        validationPlugin=None,
    ):
        """Creates a new experiment type (collection type)"""
        return ExperimentType(
            self,
            code=code,
            description=description,
            validationPlugin=validationPlugin,
            method=self.get_experiment_type,
        )

    new_collection_type = new_experiment_type

    def new_material_type(
        self,
        code,
        description=None,
        validationPlugin=None,
    ):
        """Creates a new material type."""
        return MaterialType(
            self,
            code=code,
            description=description,
            validationPlugin=validationPlugin,
            method=self.get_material_type,
        )

    def new_dataset(
        self,
        type=None,
        kind="PHYSICAL_DATA",
        files=None,
        file=None,
        props=None,
        folder=None,
        **kwargs,
    ):
        """Creates a new dataset of a given type.

        type         -- sampleType code or object: mandatory
        sample       -- sample code or object
        experiment   -- experiment code or object
        collection   -- same as above
        file         -- path to a single file or a directory
        files        -- list of paths to files. Instead of a file, a directory (or many directories)
                        can be provided, the structure is kept intact in openBIS
        zipfile      -- path to a zipfile, which is unzipped in openBIS
        kind         -- if set to CONTAINER, no files should be provided.
                        Instead, the dataset acts as a container for other datasets.

        props        -- a dictionary containing the properties
        """

        if type is None:
            raise ValueError("Please provide a dataSet type")

        if file:
            files = [file]

        if isinstance(type, str):
            type_obj = self.get_dataset_type(type.upper())
        else:
            type_obj = type

        if "object" in kwargs:
            kwargs["sample"] = kwargs["object"]
            kwargs.pop("object", None)
        if "collection" in kwargs:
            kwargs["experiment"] = kwargs["collection"]
            kwargs.pop("collection", None)

        return DataSet(
            self,
            type=type_obj,
            kind=kind,
            files=files,
            folder=folder,
            props=props,
            **kwargs,
        )

    def new_semantic_annotation(self, entityType=None, propertyType=None, **kwargs):
        """Note: not functional yet."""
        return SemanticAnnotation(
            openbis_obj=self,