Skip to content
Snippets Groups Projects
dataset.py 52.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • #   Copyright ETH 2018 - 2023 Zürich, Scientific IT Services
    # 
    #   Licensed under the Apache License, Version 2.0 (the "License");
    #   you may not use this file except in compliance with the License.
    #   You may obtain a copy of the License at
    # 
    #        http://www.apache.org/licenses/LICENSE-2.0
    #   
    #   Unless required by applicable law or agreed to in writing, software
    #   distributed under the License is distributed on an "AS IS" BASIS,
    #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    #   See the License for the specific language governing permissions and
    #   limitations under the License.
    #
    
    import random
    import time
    import urllib.parse
    
    Andrei Plamada's avatar
    Andrei Plamada committed
    from functools import partialmethod
    from pathlib import Path
    
    Andrei Plamada's avatar
    Andrei Plamada committed
    from typing import Set, Optional, List
    
    from urllib.parse import urljoin, quote
    
    import requests
    from pandas import DataFrame
    from requests import Session
    
    Swen Vermeul's avatar
    Swen Vermeul committed
    from .definitions import (
        openbis_definitions,
        get_type_for_entity,
        get_fetchoption_for_entity,
    )
    
    from .fast_download import FastDownload
    from .openbis_object import OpenBisObject
    from .things import Things
    
    Swen Vermeul's avatar
    Swen Vermeul committed
    from .utils import (
        VERBOSE,
        parse_jackson,
        extract_permid,
        extract_code,
        extract_downloadUrl,
    )
    
    PYBIS_PLUGIN = "dataset-uploader-api"
    
    Swen Vermeul's avatar
    Swen Vermeul committed
    dataset_definitions = openbis_definitions("dataSet")
    dss_endpoint = "/datastore_server/rmi-data-store-server-v3.json"
    
    def signed_to_unsigned(sig_int):
        """openBIS delivers crc32 checksums as signed integers.
        If the number is negative, we just have to add 2**32
        We display the hex number to match with the classic UI
        """
        if sig_int < 0:
    
        return "%x" % (sig_int & 0xFFFFFFFF)
    
    
    
    class DataSet(
        OpenBisObject,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        entity="dataSet",
        single_item_method_name="get_dataset",
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        """DataSet are openBIS objects that contain the actual files."""
    
        def __init__(
    
                self,
                openbis_obj,
                type,
                data=None,
                files=None,
                zipfile=None,
                folder=None,
                kind=None,
                props=None,
                **kwargs,
    
                if files is None and zipfile is None:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    raise ValueError("please provide at least one file")
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    raise ValueError(
                        "please provide either a list of files or a single zipfile"
                    )
    
                if zipfile:
                    files = [zipfile]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.__dict__["isZipDirectoryUpload"] = True
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.__dict__["isZipDirectoryUpload"] = False
    
    
                if files:
                    if isinstance(files, str):
                        files = [files]
    
                    for file in files:
                        if not os.path.exists(file):
    
                            raise ValueError(f"File {file} does not exist")
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.__dict__["files"] = files
    
            # initialize the OpenBisObject
    
            super().__init__(openbis_obj, type=type, data=data, props=props, **kwargs)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            self.__dict__["files_in_wsp"] = []
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if data["physicalData"] is None:
                    self.__dict__["shareId"] = None
                    self.__dict__["location"] = None
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.__dict__["shareId"] = data["physicalData"]["shareId"]
                    self.__dict__["location"] = data["physicalData"]["location"]
    
    
            if kind is not None:
                kind = kind.upper()
    
                allowed_kinds = ["PHYSICAL", "CONTAINER", "LINK"]
    
                if kind not in allowed_kinds:
                    raise ValueError(
    
                        f"only these values are allowed for kind: {allowed_kinds}"
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                self.a.__dict__["_kind"] = kind
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            self.__dict__["folder"] = folder
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if getattr(self, "parents") is None:
                self.a.__dict__["_parents"] = []
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.a.__dict__["_parents_orig"] = self.a.__dict__["_parents"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if getattr(self, "children") is None:
                self.a.__dict__["_children"] = []
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.a.__dict__["_children_orig"] = self.a.__dict__["_children"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if getattr(self, "container") is None:
                self.a.__dict__["_container"] = []
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.a.__dict__["_container_orig"] = self.a.__dict__["_container"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if getattr(self, "component") is None:
                self.a.__dict__["_component"] = []
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    self.a.__dict__["_component_orig"] = self.a.__dict__["_component"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            return self.data["code"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                "get_parents()",
                "get_children()",
                "get_components()",
                "get_contained()",
                "get_containers()",
                "add_parents()",
                "add_children()",
                "add_components()",
                "add_contained()",
                "add_containers()",
                "del_parents()",
                "del_children()",
                "del_components()",
                "del_contained()",
                "del_containers()",
                "set_parents()",
                "set_children()",
                "set_components()",
                "set_contained()",
                "set_containers()",
                "set_tags()",
                "add_tags()",
                "del_tags()",
                "add_attachment()",
                "get_attachments()",
                "download_attachments()",
                "get_files()",
                "file_list",
                "file_links",
                "rel_file_links",
                "physicalData",
                "download()",
                "download_path",
                "is_physical()",
                "symlink()",
                "is_symlink()",
                "archive()",
                "unarchive()",
                "save()",
                "delete()",
                "mark_to_be_deleted()",
                "unmark_to_be_deleted()",
                "is_marked_to_be_deleted()",
                "attrs",
                "props",
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            ] + super().__dir__()
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if name in ["folder"]:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            elif name in ["p", "props"]:
    
                if isinstance(value, dict):
                    for p in value:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        setattr(self.__dict__["p"], p, value[p])
    
                else:
                    raise ValueError("please provide a dictionary for setting properties")
    
            else:
                super(DataSet, self).__setattr__(name, value)
    
        @property
        def props(self):
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            return self.__dict__["p"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            return self.__dict__["type"]
    
    
        @type.setter
        def type(self, type_name):
            dataset_type = self.openbis.get_dataset_type(type_name.upper())
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            self.p.__dict__["_type"] = dataset_type
            self.a.__dict__["_type"] = dataset_type
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if "physicalData" in self.data:
                return PhysicalData(data=self.data["physicalData"])
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            if "linkedData" in self.data:
                return LinkedData(data=self.data["linkedData"])
    
        @property
        def status(self):
            ds = self.openbis.get_dataset(self.permId)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            self.data["physicalData"] = ds.data["physicalData"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                return self.data["physicalData"]["status"]
    
        @property
        def download_path(self):
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            """after ther physical datasets have been downloaded, this returns the relative path."""
            return self.__dict__.get("download_path", "")
    
    Andrei Plamada's avatar
    Andrei Plamada committed
    
        @property
    
        def _sftp_source_dir(self):
            """The SFTP directory is structured as follows:
            /SPACE/PROJECT/EXPERIMENT/permId
    
            For the current dataSet, this method returns the expected path
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            """
    
            return os.path.join(self.experiment.identifier[1:], self.permId)
    
        def symlink(self, target_dir: str = None, replace_if_symlink_exists: bool = True):
            """replace_if_symlink_exists will replace the the target_dir
            in case it is an existing symlink
            Returns the absolute path of the symlink
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            """
    
            if target_dir is None:
                target_dir = os.path.join(self.openbis.download_prefix, self.permId)
    
    
    Andrei Plamada's avatar
    Andrei Plamada committed
            target_dir_path = Path(target_dir)
            if target_dir_path.is_symlink() and replace_if_symlink_exists:
                target_dir_path.unlink()
    
    
            # create data/openbis-hostname
            os.makedirs(os.path.dirname(target_dir_path.absolute()), exist_ok=True)
    
            # make sure we got a mountpoint
            mountpoint_path = self.openbis.get_mountpoint()
            if mountpoint_path is None:
                try:
                    mountpoint_path = self.openbis.mount()
                except ValueError as err:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    if "password" in str(err):
                        raise ValueError(
                            "openBIS instance cannot be mounted, no symlink possible"
                        )
    
    
            # construct the absolute path of our sftp source
            sftp_source_path = os.path.join(mountpoint_path, self._sftp_source_dir)
    
            # make sure our sftp source is really available
            # create symlink
            if os.path.exists(sftp_source_path):
                target_dir_path.symlink_to(sftp_source_path, target_is_directory=True)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if VERBOSE:
                    print(f"Symlink created: {target_dir} --> {sftp_source_path}")
    
    
                return str(target_dir_path.absolute())
            else:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                raise ValueError(
                    f"Source path {sftp_source_path} does not exist, cannot create symlink"
                )
    
    Andrei Plamada's avatar
    Andrei Plamada committed
    
        @staticmethod
        def _file_set(target_dir: str) -> Set[str]:
            target_dir_path = Path(target_dir)
            return set(
                str(el.relative_to(target_dir_path))
                for el in target_dir_path.glob("**/*")
                if el.is_file()
            )
    
        def _is_symlink_or_physical(
    
                self,
                what: str,
                target_dir: str = None,
                expected_file_list: Optional[List[str]] = None,
    
    Andrei Plamada's avatar
    Andrei Plamada committed
        ):
    
            if target_dir is None:
                target_dir = os.path.join(self.openbis.download_prefix, self.permId)
    
    Andrei Plamada's avatar
    Andrei Plamada committed
            target_dir_path = Path(target_dir)
    
            target_file_set = self._file_set(target_dir)
    
            if expected_file_list is None:
                source_file_set = set(self.file_list)
            else:
                source_file_set = set(expected_file_list)
    
            res = source_file_set.issubset(target_file_set)
            if not res:
                return res
            elif what == "symlink":
                return target_dir_path.exists() and target_dir_path.is_symlink()
            elif what == "physical":
                return target_dir_path.exists() and not target_dir_path.is_symlink()
            else:
                raise ValueError("Unexpected error")
    
        is_symlink = partialmethod(
            _is_symlink_or_physical, what="symlink", expected_file_list=None
        )
        is_physical = partialmethod(_is_symlink_or_physical, what="physical")
    
    
        def archive(self, remove_from_data_store=True):
            fetchopts = {
                "removeFromDataStore": remove_from_data_store,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                "@type": "as.dto.dataset.archive.DataSetArchiveOptions",
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            self.archive_unarchive("archiveDataSets", fetchopts)
            if VERBOSE:
    
                print(f"DataSet {self.permId} archived")
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            fetchopts = {"@type": "as.dto.dataset.unarchive.DataSetUnarchiveOptions"}
            self.archive_unarchive("unarchiveDataSets", fetchopts)
            if VERBOSE:
    
                print(f"DataSet {self.permId} unarchived")
    
    
        def archive_unarchive(self, method, fetchopts):
            payload = {}
    
            request = {
                "method": method,
                "params": [
                    self.openbis.token,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    [{"permId": self.permId, "@type": "as.dto.dataset.id.DataSetPermId"}],
                    dict(fetchopts),
    
                ],
            }
            resp = self.openbis._post_request(self._openbis.as_v3, request)
            return
    
        def set_properties(self, properties):
    
            """expects a dictionary of property names and their values.
            Does not save the dataset.
            """
            for prop in properties.keys():
                setattr(self.p, prop, properties[prop])
    
        set_props = set_properties
    
        def get_dataset_files(self, start_with=None, count=None, **properties):
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            search_criteria = get_type_for_entity("dataSetFile", "search")
    
            search_criteria["operator"] = "AND"
            search_criteria["criteria"] = [
                {
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    "criteria": [
                        {
                            "fieldName": "code",
                            "fieldType": "ATTRIBUTE",
                            "fieldValue": {
                                "value": self.permId,
                                "@type": "as.dto.common.search.StringEqualToValue",
                            },
                            "@type": "as.dto.common.search.CodeSearchCriteria",
                        }
                    ],
                    "operator": "OR",
                    "@type": "as.dto.dataset.search.DataSetSearchCriteria",
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            fetchopts = get_fetchoption_for_entity("dataSetFile")
    
    
            request = {
                "method": "searchFiles",
                "params": [
    
                    search_criteria,
                    fetchopts,
                ],
            }
    
            full_url = urljoin(self._get_download_url(), dss_endpoint)
            resp = self.openbis._post_request_full_url(full_url, request)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                objects = response["objects"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    "dataSetPermId",
                    "dataStore",
                    "downloadUrl",
                    "path",
                    "directory",
                    "fileLength",
                    "checksumCRC32",
                    "checksum",
                    "checksumType",
    
                ]
    
                dataSetFiles = None
                if len(objects) == 0:
                    dataSetFiles = DataFrame(columns=attrs)
                else:
                    dataSetFiles = DataFrame(objects)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    dataSetFiles["downloadUrl"] = dataSetFiles["dataStore"].map(
                        extract_downloadUrl
                    )
    
                    dataSetFiles["checksumCRC32"] = (
                        dataSetFiles["checksumCRC32"]
                        .fillna(0.0)
                        .astype(int)
                        .map(signed_to_unsigned)
                    )
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    dataSetFiles["dataStore"] = dataSetFiles["dataStore"].map(extract_code)
                    dataSetFiles["dataSetPermId"] = dataSetFiles["dataSetPermId"].map(
                        extract_permid
                    )
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                openbis_obj=self.openbis,
                entity="dataSetFile",
                identifier_name="dataSetPermId",
    
                start_with=start_with,
                count=count,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                totalCount=resp.get("totalCount"),
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                df_initializer=create_data_frame,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def download(
    
                self,
                files=None,
                destination=None,
                create_default_folders=True,
                wait_until_finished=True,
                workers=10,
                linked_dataset_fileservice_url=None,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        ):
            """download the files of the dataSet.
    
    
            files -- a single file or a list of files. If no files are specified, all files of a given dataset are downloaded.
            destination -- if destination is specified, files are downloaded in __current_dir__/destination/permId/ If no destination is specified, the hostname is chosen instead of destination
            create_default_folders -- by default, this download method will automatically create destination/permId/original/DEFAULT. If create_default_folders is set to False, all these folders will be ommited. Use with care and by specifying the destination folder.
            workers -- Default: 10. Files are usually downloaded in parallel, using 10 workers by default.
            wait_unitl_finished -- True. If you want to immediately continue and run the download in background, set this to False.
    
            """
    
            if files == None:
                files = self.file_list
            elif isinstance(files, str):
                files = [files]
    
            if destination is None:
    
                destination = self.openbis.download_prefix
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                # destination = self.openbis.hostname
    
            kind = None
            if "kind" in self.data:  # openBIS 18.6.x DTO
                kind = self.data["kind"]
            elif ("type" in self.data) and (
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            ):  # openBIS 16.5.x DTO
                kind = self.data["type"]["kind"]
    
            if kind in ["PHYSICAL", "CONTAINER"]:
    
                if self.openbis.get_server_information().is_version_greater_than(3, 5):
    
                    return self._download_fast_physical(files, destination, create_default_folders,
                                                        wait_until_finished)
                else:
                    return self._download_physical(
                        files, destination, create_default_folders, wait_until_finished, workers
                    )
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            elif kind == "LINK":
    
                if linked_dataset_fileservice_url is None:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    raise ValueError(
                        "Can't download a LINK data set without the linked_dataset_fileservice_url parameters."
                    )
                return self._download_link(
                    files,
                    destination,
                    wait_until_finished,
                    workers,
                    linked_dataset_fileservice_url,
                    content_copy_index,
                )
    
                raise ValueError(f"Can't download data set of kind {kind}.")
    
        def _download_fast_physical(
                self, files, destination, create_default_folders, wait_until_finished
        ):
            """Download for data sets of kind PHYSICAL using fast download scheme"""
    
            if create_default_folders:
                final_destination = os.path.join(destination, self.permId)
            else:
                final_destination = destination
    
            self.__dict__["download_path"] = final_destination
    
            download_url = self._get_download_url()
    
            fast_download = FastDownload(self.openbis.token, download_url, self.permId, files,
                                         final_destination, create_default_folders, wait_until_finished,
                                         self.openbis.verify_certificates,
                                         wished_number_of_streams=4)
            return fast_download.download()
    
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def _download_physical(
    
                self, files, destination, create_default_folders, wait_until_finished, workers
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        ):
            """Download for data sets of kind PHYSICAL."""
    
            final_destination = ""
            if create_default_folders:
                final_destination = os.path.join(destination, self.permId)
            else:
                final_destination = destination
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            self.__dict__["download_path"] = final_destination
    
            download_url = self._get_download_url()
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            base_url = download_url + "/datastore_server/" + self.permId + "/"
    
            with DataSetDownloadQueue(workers=workers) as queue:
                # get file list and start download
                for filename in files:
    
                    fi_df = self.get_dataset_files().df
                    file_size = fi_df[fi_df["path"] == filename]["fileLength"].values[0]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    download_url = base_url + filename + "?sessionID=" + self.openbis.token
                    download_url = quote(download_url, safe=":/?=")
    
                    filename_dest = ""
                    if create_default_folders:
                        # create original/ or original/DEFAULT subfolders
                        filename_dest = os.path.join(final_destination, filename)
                    else:
                        # ignore original/ and original/DEFAULT folders that come from openBIS
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        if filename.startswith("original/"):
                            filename = filename.replace("original/", "", 1)
                        if filename.startswith("DEFAULT/"):
                            filename = filename.replace("DEFAULT/", "", 1)
    
                        filename_dest = os.path.join(final_destination, filename)
    
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    queue.put(
                        [
                            download_url,
                            filename,
                            filename_dest,
                            file_size,
                            self.openbis.verify_certificates,
                            "wb",
                        ]
                    )
    
                # wait until all files have downloaded
                if wait_until_finished:
                    queue.join()
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if VERBOSE:
    
                    print(f"Files downloaded to: {os.path.join(final_destination)}")
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def _download_link(
    
                self,
                files,
                destination,
                wait_until_finished,
                workers,
                linked_dataset_fileservice_url,
                content_copy_index,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        ):
            """Download for data sets of kind LINK.
    
            Requires the microservice server to be running at the given linked_dataset_fileservice_url.
            """
    
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            with DataSetDownloadQueue(
    
                    workers=workers, collect_files_with_wrong_length=True
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            ) as queue:
    
                if content_copy_index >= len(self.data["linkedData"]["contentCopies"]):
                    raise ValueError("Content Copy index out of range.")
                content_copy = self.data["linkedData"]["contentCopies"][content_copy_index]
    
                    fi_df = self.get_dataset_files().df
                    file_size = fi_df[fi_df["path"] == filename]["fileLength"].values[0]
    
                    download_url = linked_dataset_fileservice_url
                    download_url += "?sessionToken=" + self.openbis.token
                    download_url += "&datasetPermId=" + self.data["permId"]["permId"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    download_url += (
    
                            "&externalDMSCode=" + content_copy["externalDms"]["code"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    )
                    download_url += "&contentCopyPath=" + content_copy["path"].replace(
                        "/", "%2F"
                    )
    
                    download_url += "&datasetPathToFile=" + urllib.parse.quote(filename)
    
                    filename_dest = os.path.join(destination, self.permId, filename)
    
                    # continue download if file is not complete - do nothing if it is
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    write_mode = "wb"
    
                    if os.path.exists(filename_dest):
                        actual_size = os.path.getsize(filename_dest)
                        if actual_size == int(file_size):
                            continue
                        elif actual_size < int(file_size):
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                            write_mode = "ab"
    
                            download_url += "&offset=" + str(actual_size)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    queue.put(
                        [
                            download_url,
                            filename,
                            filename_dest,
                            file_size,
                            self.openbis.verify_certificates,
                            write_mode,
                        ]
                    )
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if VERBOSE:
                    print(
                        "Files downloaded to: %s" % os.path.join(destination, self.permId)
                    )
    
                return destination, queue.files_with_wrong_length
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            return self.__dict__["folder"]
    
            """Returns the list of files including their directories as an array of strings.
            Folders are not listed.
    
                fl = self.get_dataset_files().df
                return fl[fl["directory"] == False]["path"].to_list()
    
        @property
        def file_links(self):
    
            """Returns a dictionary of absolute file links for every file in this dataSet.
            As the link also contains a session token (sessionID), sharing this link might be
            a security risk. When the token is no longer valid, the link will no longer work either.
            """
    
            if self.is_new:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                return ""
    
            url = self.openbis.url
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            location_part = self.physicalData.location.split("/")[-1]
    
            token = self.openbis.token
    
            file_links = {}
            for filepath in self.file_list:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                quoted_filepath = urllib.parse.quote(filepath, safe="")
                file_links[filepath] = (
    
                        "/".join([url, "datastore_server", location_part, quoted_filepath])
                        + "?sessionID="
                        + token
    
            return file_links
    
        @property
        def rel_file_links(self):
            """Returns a dictionary of relative file links for every file in this dataSet. These relative file link can be embedded in a <img src="{rel_link}">
            element within a XML property. If the dataSet file happens to be a picture, in ELN-LIMS, the picture will be displayed inline.
            """
            if self.is_new:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                return ""
    
            url = self.openbis.url
    
    Swen Vermeul's avatar
    Swen Vermeul committed
            location_part = self.physicalData.location.split("/")[-1]
    
            rel_file_links = {}
            for filepath in self.file_list:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                quoted_filepath = urllib.parse.quote(filepath, safe="")
                rel_file_links[filepath] = "/".join(
                    ["/datastore_server", location_part, quoted_filepath]
                )
    
            return rel_file_links
    
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def get_files(self, start_folder="/"):
            """Returns a DataFrame of all files in this dataset"""
    
            if start_folder.startswith("/"):
                start_folder = start_folder[1:]
            file_list = self.get_dataset_files().df
            file_list[file_list["path"].str.startswith(start_folder)]
            new_file_list = file_list[
                ["directory", "path", "fileLength", "checksumCRC32"]
            ].rename(
                columns={
                    "directory": "isDirectory",
                    "path": "pathInDataSet",
                    "fileLength": "fileSize",
                    "checksumCRC32": "crc32Checksum",
                }
    
        def _get_download_url(self):
            download_url = ""
            if "downloadUrl" in self.data["dataStore"]:
    
                download_url = self.data["dataStore"]["downloadUrl"]
    
            else:
                # fallback, if there is no dataStore defined
                datastores = self.openbis.get_datastores()
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                download_url = datastores["downloadUrl"][0]
    
        def get_file_list(self, recursive=True, start_folder="/"):
            """Lists all files of a given dataset. You can specifiy a start_folder other than "/".
            By default, all directories and their containing files are listed recursively. You can
            turn off this option by setting recursive=False.
            """
    
            print("This method is deprecated. Consider using get_files() instead")
    
            request = {
                "method": "listFilesForDataSet",
                "params": [
                    self.openbis.token,
                    self.permId,
                    start_folder,
                    recursive,
                ],
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                "id": "1",
    
            download_url = self._get_download_url()
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                download_url + "/datastore_server/rmi-dss-api-v1.json",
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                verify=self.openbis.verify_certificates,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if "error" in data:
                    raise ValueError("Error from openBIS: " + data["error"]["message"])
                elif "result" in data:
                    return data["result"]
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    raise ValueError(
                        "request to openBIS did not return either result nor error"
                    )
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                raise ValueError("internal error while performing post request")
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def _generate_plugin_request(self, dss, permId=None):
    
            """generates a request to activate the dataset-uploader ingestion plugin to
            register our files as a new dataset
            """
    
            sample_identifier = None
            if self.sample is not None:
                sample_identifier = self.sample.identifier
    
            experiment_identifier = None
            if self.experiment is not None:
                experiment_identifier = self.experiment.identifier
    
            parentIds = self.parents
    
    
            dataset_type = self.type.code
            properties = self.props.all_nonempty()
    
            request = {
                "method": "createReportFromAggregationService",
                "params": [
                    self.openbis.token,
                    dss,
                    PYBIS_PLUGIN,
                    {
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        "permId": permId,
                        "method": "insertDataSet",
                        "sampleIdentifier": sample_identifier,
                        "experimentIdentifier": experiment_identifier,
                        "dataSetType": dataset_type,
                        "folderName": self.folder,
                        "fileNames": self.files_in_wsp,
                        "isZipDirectoryUpload": self.isZipDirectoryUpload,
                        "properties": properties,
                        "parentIdentifiers": parentIds,
                    },
    
    Swen Vermeul's avatar
    Swen Vermeul committed
        def save(self, permId=None):
    
            for prop_name, prop in self.props._property_names.items():
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if prop["mandatory"]:
                    if (
    
                            getattr(self.props, prop_name) is None
                            or getattr(self.props, prop_name) == ""
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    ):
                        raise ValueError(
    
                            f"Property '{prop_name}' is mandatory and must not be None"
    
                data_stores = self.openbis.get_datastores()
    
                if self.sample is None and self.experiment is None:
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    raise ValueError(
                        "A DataSet must be either connected to a Sample or an Experiment"
                    )
    
                    if self.files is None or len(self.files) == 0:
                        raise ValueError(
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                            "Cannot register a dataset without a file. Please provide at least one file"
    
                    if self.openbis.get_server_information().is_version_greater_than(3, 5):
                        return self._upload_v3(data_stores)
    
                    return self._upload_v1(permId, data_stores)
    
                    if self.files is not None and len(self.files) > 0:
                        raise ValueError(
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                            "DataSets of kind CONTAINER or LINK cannot contain data"
    
    
                    # if no code for the container was provided, let openBIS
                    # generate the code automatically
                    if self.code is None or self.code == "":
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        request["params"][1][0]["autoGeneratedCode"] = True
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        request["params"][1][0]["autoGeneratedCode"] = False
    
                    props = self.p._all_props()
    
                    request["params"][1][0]["properties"] = props
                    request["params"][1][0]["dataStoreId"] = {
                        "permId": DSpermId,
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                        "@type": "as.dto.datastore.id.DataStorePermId",
    
                    }
                    resp = self.openbis._post_request(self.openbis.as_v3, request)
    
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                    if VERBOSE:
                        print("DataSet successfully created.")
                    new_dataset_data = self.openbis.get_dataset(
                        resp[0]["permId"], only_data=True
                    )
    
                    self._set_data(new_dataset_data)
                    return self
    
            else:
                request = self._up_attrs()
                props = self.p._all_props()
                request["params"][1][0]["properties"] = props
    
                self.openbis._post_request(self.openbis.as_v3, request)
    
    Swen Vermeul's avatar
    Swen Vermeul committed
                if VERBOSE:
                    print("DataSet successfully updated.")
    
        def _upload_v1(self, permId, datastores):
            # for uploading phyiscal data, we first upload it to the session workspace
            self.upload_files_v1(
                datastore_url=datastores["downloadUrl"][0],
                files=self.files,
                folder="",
                wait_until_finished=True,
            )
    
            # activate the ingestion plugin, as soon as the data is uploaded
            # this will actually register the dataset in the datastore and the AS
            request = self._generate_plugin_request(
                dss=datastores["code"][0],
                permId=permId,
            )
            resp = self.openbis._post_request(self.openbis.reg_v1, request)
            if resp["rows"][0][0]["value"] == "OK":
                permId = resp["rows"][0][2]["value"]
                if permId is None or permId == "":
                    self.__dict__["is_new"] = False
                    if VERBOSE:
                        print(
                            "DataSet successfully created. Because you connected to an openBIS version older than 16.05.04, you cannot update the object."
                        )
                else:
                    new_dataset_data = self.openbis.get_dataset(
                        permId, only_data=True
                    )
                    self._set_data(new_dataset_data)
                    if VERBOSE:
                        print("DataSet successfully created.")
                    return self
            else:
                print(json.dumps(request))
                raise ValueError(
                    "Error while creating the DataSet: "
                    + resp["rows"][0][1]["value"]
                )
    
        def _upload_v3(self, data_stores):
            upload_id = str(uuid.uuid4())
    
            datastore_url = data_stores["downloadUrl"][0]
    
            # for uploading phyiscal data, we first upload it to the session workspace
            self.upload_files_v3(
                upload_id=upload_id,
    
                files=self.files,
                folder="",
                wait_until_finished=True,
            )
    
            param = {
                "@type": "dss.dto.dataset.create.UploadedDataSetCreation",
                "@id": "1",
                "typeId": {
                    "@type": "as.dto.entitytype.id.EntityTypePermId",
                    "@id": "2",
                    "permId": self.type.code,
                    "entityKind": "DATA_SET"},
    
                "properties": self.props.all_nonempty(),
                "parentIds": [],
                "uploadId": upload_id
            }
    
            if self.experiment is not None:
                param["experimentId"] = {
                    "@type": "as.dto.experiment.id.ExperimentIdentifier",
                    "@id": "3",
                    "identifier": self.experiment.identifier
                }
            if self.sample is not None:
                param["sampleId"] = {
                    "@type": "as.dto.sample.id.SamplePermId",
                    "@id": "4",
                    "permId": self.sample.permId
                }
    
            # TODO: check if this part is needed
    
            parent_ids = self.parents
            if parent_ids is None:
                parent_ids = []
            counter = 5
            for parent_id in parent_ids:
                param["parentIds"] += {
                    "@type": "as.dto.dataset.id.DataSetPermId",
                    "@id": str(counter),
                    "permId": parent_id
                }
                counter += 1