pybis.py

            "type": { "@type": "as.dto.experiment.fetchoptions.ExperimentTypeFetchOptions" },
            "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" 
        }

        request = {
            "method": "searchExperiments",
            "params": [ self.token, 
                criteria,
                options,
            ],
        }
        resp = self._post_request(self.as_v3, request)
        if len(resp['objects']) == 0:
            raise ValueError("No experiments found!")

        objects = resp['objects']
        parse_jackson(objects)

        experiments = DataFrame(objects)
        experiments['registrationDate']= experiments['registrationDate'].map(format_timestamp)
        experiments['modificationDate']= experiments['modificationDate'].map(format_timestamp)
        experiments['project']= experiments['project'].map(extract_code)
        experiments['registrator'] = experiments['registrator'].map(extract_person)
        experiments['modifier'] = experiments['modifier'].map(extract_person)
        experiments['identifier'] = experiments['identifier'].map(extract_identifier)
        experiments['type'] = experiments['type'].map(extract_code)

        exps = experiments[['code', 'identifier', 'project', 'type', 'registrator', 
            'registrationDate', 'modifier', 'modificationDate']]
        return Things(self, 'experiment', exps, 'identifier')


    def get_datasets(self, 
        code=None, type=None, 
        withParents=None, withChildren=None,
        withSamples=None, withExperiments=None
    ):

        sub_criteria = []

        if code:
            sub_criteria.append(_criteria_for_code(code))
        if type:
            sub_criteria.append(_subcriteria_for_type(type, 'DataSet'))
        if withParents:
            sub_criteria.append(_subcriteria_for_permid(withParents, 'DataSet', 'Parents'))
        if withChildren:
            sub_criteria.append(_subcriteria_for_permid(withChildren, 'DataSet', 'Children'))
        if withSamples:
            sub_criteria.append(_subcriteria_for_permid(withSamples, 'Sample'))

        if withExperiments:
            sub_criteria.append(_subcriteria_for_permid(withExperiments, 'Experiment'))

        criteria = {
            "criteria": sub_criteria,
            "@type": "as.dto.dataset.search.DataSetSearchCriteria",
            "operator": "AND"
        }

        fetchopts = {
#            "parents":      { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
#            "children":     { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
            "containers":   { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
            "type":         { "@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions" }
        }

        for option in ['tags', 'properties', 'sample']:
            fetchopts[option] = fetch_option[option]

        request = {
            "method": "searchDataSets",
            "params": [ self.token, 
                criteria,
                fetchopts,
            ],
        }
        resp = self._post_request(self.as_v3, request)
        objects = resp['objects']
        if len(objects) == 0:
            raise ValueError("no datasets found!")
        else:
            parse_jackson(objects)
            datasets = DataFrame(objects)
            datasets['registrationDate']= datasets['registrationDate'].map(format_timestamp)
            datasets['modificationDate']= datasets['modificationDate'].map(format_timestamp)
            datasets['sample']= datasets['sample'].map(extract_nested_identifier)
            datasets['type']= datasets['type'].map(extract_code)
            ds = Things(
                self,
                'dataset',
                datasets[['code', 'properties', 'type', 'sample', 'registrationDate', 'modificationDate']]
            )
            return ds


    def get_experiment(self, expId, withAttachments=False):
        """ Returns an experiment object for a given identifier (expId).
        """

        fetchopts = {
            "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions",
            "type": {
                "@type": "as.dto.experiment.fetchoptions.ExperimentTypeFetchOptions",
            },
        }

        search_request = search_request_for_identifier(expId, 'experiment')
        for option in ['tags', 'properties', 'attachments', 'project', 'samples']:
            fetchopts[option] = fetch_option[option]

        if withAttachments:
            fetchopts['attachments'] = fetch_option['attachmentsWithContent']

        request = {
        "method": "getExperiments",
            "params": [ 
                self.token,
                [ search_request ],
                fetchopts
            ],
        } 
        resp = self._post_request(self.as_v3, request)
        if len(resp) == 0:
            raise ValueError("No such experiment: %s" % expId)
        return Experiment(self, 
            self.get_experiment_type(resp[expId]["type"]["code"]), 
            resp[expId]
        )


    def new_experiment(self, project_ident, code, type, properties=None, attachments=None, tags=None):

        tagIds = _create_tagIds(tags)
        typeId = _create_typeId(type)
        projectId = _create_projectId(project_ident)
        if properties is None:
            properties = {}
        
        request = {
            "method": "createExperiments",
            "params": [
                self.token,
                [
                    {
                        "properties": properties,
                        "code": code,
                        "typeId" : typeId,
                        "projectId": projectId,
                        "tagIds": tagIds,
                        "attachments": attachments,
                        "@type": "as.dto.experiment.create.ExperimentCreation",
                    }
                ]
            ],
        }
        resp = self._post_request(self.as_v3, request)
        return self.get_experiment(resp[0]['permId'])


    def update_experiment(self, experimentId, properties=None, tagIds=None, attachments=None):
        params = {
            "experimentId": {
                "permId": experimentId,
                "@type": "as.dto.experiment.id.ExperimentPermId"
            },
            "@type": "as.dto.experiment.update.ExperimentUpdate"
        }
        if properties is not None:
            params["properties"]= properties
        if tagIds is not None:
            params["tagIds"] = tagIds
        if attachments is not None:
            params["attachments"] = attachments

        request = {
            "method": "updateExperiments",
            "params": [
                self.token,
                [ params ]
            ]
        }
        self._post_request(self.as_v3, request)


    def create_sample(self, space_ident, code, type, 
        project_ident=None, experiment_ident=None, properties=None, attachments=None, tags=None):

        tagIds = _create_tagIds(tags)
        typeId = _create_typeId(type)
        projectId = _create_projectId(project_ident)
        experimentId = _create_experimentId(experiment_ident)

        if properties is None:
            properties = {}
        
        request = {
            "method": "createSamples",
            "params": [
                self.token,
                [
                    {
                        "properties": properties,
                        "code": code,
                        "typeId" : typeId,
                        "projectId": projectId,
                        "experimentId": experimentId,
                        "tagIds": tagIds,
                        "attachments": attachments,
                        "@type": "as.dto.sample.create.SampleCreation",
                    }
                ]
            ],
        }
        resp = self._post_request(self.as_v3, request)
        return self.get_sample(resp[0]['permId'])


    def update_sample(self, sampleId, space=None, project=None, experiment=None,
        parents=None, children=None, components=None, properties=None, tagIds=None, attachments=None):
        params = {
            "sampleId": {
                "permId": sampleId,
                "@type": "as.dto.sample.id.SamplePermId"
            },
            "@type": "as.dto.sample.update.SampleUpdate"
        }
        if space is not None:
            params['spaceId'] = space
        if project is not None:
            params['projectId'] = project
        if properties is not None:
            params["properties"]= properties
        if tagIds is not None:
            params["tagIds"] = tagIds
        if attachments is not None:
            params["attachments"] = attachments

        request = {
            "method": "updateSamples",
            "params": [
                self.token,
                [ params ]
            ]
        }
        self._post_request(self.as_v3, request)


    def delete_entity(self, what, permid, reason):
        """Deletes Spaces, Projects, Experiments, Samples and DataSets
        """

        entity_type = "as.dto.{}.id.{}PermId".format(what.lower(), what.capitalize())
        request = {
            "method": "delete" + what.capitalize()  + 's',
            "params": [
                self.token,
                [
                    {
                        "permId": permid,
                        "@type": entity_type
                    }
                ],
                {
                    "reason": reason,
                    "@type": "as.dto.{}.delete.{}DeletionOptions".format(what.lower(), what.capitalize())
                }
            ]
        }
        self._post_request(self.as_v3, request)


    def get_deletions(self):
        request = {
            "method": "searchDeletions",
            "params": [
                self.token,
                {},
                {
                    "deletedObjects": {
                        "@type": "as.dto.deletion.fetchoptions.DeletedObjectFetchOptions"
                    }
                }
            ]
        }
        resp = self._post_request(self.as_v3, request)
        objects = resp['objects']
        parse_jackson(objects)

        new_objs = [] 
        for value in objects:
            del_objs = extract_deletion(value)
            if len(del_objs) > 0:
                new_objs.append(*del_objs)

        return DataFrame(new_objs)


    def new_project(self, space_code, code, description, leaderId):
        request = {
            "method": "createProjects",
            "params": [
                self.token,
                [
                    {
                        "code": code,
                        "spaceId": {
                            "permId": space_code,
                            "@type": "as.dto.space.id.SpacePermId"
                        },
                        "@type": "as.dto.project.create.ProjectCreation",
                        "description": description,
                        "leaderId": leaderId,
                        "attachments": None
                    }
                ]
            ],
        }
        resp = self._post_request(self.as_v3, request)
        return resp


    def get_project(self, projectId):
        request = self._create_get_request('getProjects', 'project', projectId, ['attachments'])
        resp = self._post_request(self.as_v3, request)
        return resp


    def get_projects(self, space=None):
        """ Get a list of all available projects (DataFrame object).
        """

        if space is None:
            space = self.default_space

        sub_criteria = []
        if space:
            sub_criteria.append(_subcriteria_for_code(space, 'space'))

        criteria = {
            "criteria": sub_criteria,
            "@type": "as.dto.project.search.ProjectSearchCriteria",
            "operator": "AND"
        }

        options = {
            "registrator": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
            "modifier": { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
            "experiments": { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions", },
            "space": { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" },
            "@type": "as.dto.project.fetchoptions.ProjectFetchOptions"
        }

        request = {
            "method": "searchProjects",
            "params": [ self.token, 
                criteria,
                options,
            ],
        }

        resp = self._post_request(self.as_v3, request)
        if resp is not None:
            objects = resp['objects']
            parse_jackson(objects)

            projects = DataFrame(objects)
            if len(projects) is 0:
                raise ValueError("No projects found!")

            projects['registrationDate']= projects['registrationDate'].map(format_timestamp)
            projects['modificationDate']= projects['modificationDate'].map(format_timestamp)
            projects['registrator'] = projects['registrator'].map(extract_person)
            projects['modifier'] = projects['modifier'].map(extract_person)
            projects['permid'] = projects['permId'].map(extract_permid)
            projects['identifier'] = projects['identifier'].map(extract_identifier)
            projects['space'] = projects['space'].map(extract_code)

            pros=projects[['code', 'space', 'registrator', 'registrationDate', 
                            'modifier', 'modificationDate', 'permid', 'identifier']]
            return Things(self, 'project', pros, 'identifier')
        else:
            raise ValueError("No projects found!")


    def _create_get_request(self, method_name, entity_type, permids, options):

        if not isinstance(permids, list):
            permids = [permids]

        type = "as.dto.{}.id.{}".format(entity_type.lower(), entity_type.capitalize())
        search_params = []
        for permid in permids:
            # decide if we got a permId or an identifier
            match = re.match('/', permid)
            if match:
                search_params.append(
                    { "identifier" : permid, "@type" : type + 'Identifier' }
                )
            else: 
                search_params.append(
                    { "permId" : permid, "@type": type + 'PermId' }
                )

        fo = {}
        for option in options:
            fo[option] = fetch_option[option]

        request = {
            "method": method_name,
            "params": [
                self.token,
                search_params,
                fo
            ],
        }
        return request


    def get_terms(self, vocabulary=None):
        """ Returns information about vocabulary, including its controlled vocabulary
        """

        search_request = {}
        if vocabulary is not None:
            search_request = _gen_search_request( { 
                "vocabulary": "VocabularyTerm", 
                "criteria" : [{
                    "vocabulary": "Vocabulary",
                    "code": vocabulary
                }]
            })
    
        fetch_options = {
            "vocabulary" : { "@type" : "as.dto.vocabulary.fetchoptions.VocabularyFetchOptions" },
            "@type": "as.dto.vocabulary.fetchoptions.VocabularyTermFetchOptions"
        }

        request = {
            "method": "searchVocabularyTerms",
            "params": [ self.token, search_request, fetch_options ]
        }
        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)
        return Vocabulary(resp)

    def get_tags(self):
        """ Returns a DataFrame of all 
        """
        request = {
            "method": "searchTags",
            "params": [ self.token, {}, {} ]
        }
        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)
        objects = DataFrame(resp['objects'])
        objects['registrationDate'] = objects['registrationDate'].map(format_timestamp)
        return objects[['code', 'registrationDate']]


    def get_sample_types(self, type=None):
        """ Returns a list of all available sample types
        """
        return self._get_types_of(
            "searchSampleTypes",
            "Sample",
            type, 
            ["generatedCodePrefix"]
        )

    def get_sample_type(self, type):
        try:
            return self._get_types_of(
                "searchSampleTypes", 
                "Sample",
                type,
                ["generatedCodePrefix"]
            )
        except Exception:
            raise ValueError("no such sample type: {}".format(type))


    def get_experiment_types(self, type=None):
        """ Returns a list of all available experiment types
        """
        return self._get_types_of(
            "searchExperimentTypes", 
            "Experiment", 
            type
        )

    def get_experiment_type(self, type):
        try:    
            return self._get_types_of(
                "searchExperimentTypes", 
                "Experiment", 
                type
            )
        except Exception:
            raise ValueError("No such experiment type: {}".format(type))


    def get_material_types(self, type=None):
        """ Returns a list of all available material types
        """
        return self._get_types_of("searchMaterialTypes", "Material", type)

    def get_material_type(self, type):
        try:
            return self._get_types_of("searchMaterialTypes", "Material", type)
        except Exception:
            raise ValueError("No such material type: {}".format(type))


    def get_dataset_types(self, type=None):
        """ Returns a list (DataFrame object) of all currently available dataset types
        """
        return self._get_types_of("searchDataSetTypes", "DataSet", type, ['kind'])

    def get_dataset_type(self, type):
        try:
            return self._get_types_of("searchDataSetTypes", "DataSet", type, ['kind'])
        except Exception:
            raise ValueError("No such dataSet type: {}".format(type))


    def _get_types_of(self, method_name, entity_type, type=None, additional_attributes=[]):
        """ Returns a list of all available experiment types
        """

        attributes = ['code', 'description', *additional_attributes, 'modificationDate']

        search_request = {}
        fetch_options = {}

        if type is not None:
            search_request = _gen_search_request({
                entity_type.lower(): entity_type + "Type",
                "operator": "AND",
                "code": type
            })

            fetch_options = {
                "@type": "as.dto.{}.fetchoptions.{}TypeFetchOptions".format(
                    entity_type.lower(), entity_type
                )
            }
            fetch_options['propertyAssignments'] = fetch_option['propertyAssignments']
            attributes.append('propertyAssignments')
        
        request = {
            "method": method_name,
            "params": [ self.token, search_request, fetch_options ],
        }
        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)

        if type is not None and len(resp['objects']) == 1:
            return PropertyAssignments(self, resp['objects'][0])
        if len(resp['objects']) >= 1:
            types = DataFrame(resp['objects'])
            types['modificationDate'] = types['modificationDate'].map(format_timestamp)
            return Things(self, entity_type.lower()+'_type', types[attributes])
            return types[attributes]
            
        else:
            raise ValueError("Nothing found!")


    def is_session_active(self):
        """ checks whether a session is still active. Returns true or false.
        """
        return self.is_token_valid(self.token)


    def is_token_valid(self, token=None):
        """Check if the connection to openBIS is valid.
        This method is useful to check if a token is still valid or if it has timed out,
        requiring the user to login again.
        :return: Return True if the token is valid, False if it is not valid.
        """
        if token is None:
            token = self.token
        
        if token is None:
            return False

        request = {
            "method": "isSessionActive",
            "params": [ token ],
        }
        resp = self._post_request(self.as_v1, request)
        return resp


    def get_dataset(self, permid):
        """fetch a dataset and some metadata attached to it:
        - properties
        - sample
        - parents
        - children
        - containers
        - dataStore
        - physicalData
        - linkedData
        :return: a DataSet object
        """

        criteria = [{
            "permId": permid,
            "@type": "as.dto.dataset.id.DataSetPermId"
        }]

        fetchopts = {
            "parents":      { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
            "children":     { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
            "containers":   { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
            "@type":        "as.dto.dataset.fetchoptions.DataSetFetchOptions",
        }

        for option in ['tags', 'properties', 'dataStore', 'physicalData', 'linkedData', 
                       'experiment', 'sample']:
            fetchopts[option] = fetch_option[option]

        request = {
            "method": "getDataSets",
            "params": [ self.token, 
                criteria,
                fetchopts,
            ],
        }

        resp = self._post_request(self.as_v3, request)
        if resp is not None:
            for permid in resp:
                return DataSet(self, resp[permid])


    def get_sample(self, sample_ident, only_data=False, withAttachments=False):
        """Retrieve metadata for the sample.
        Get metadata for the sample and any directly connected parents of the sample to allow access
        to the same information visible in the ELN UI. The metadata will be on the file system.
        :param sample_identifiers: A list of sample identifiers to retrieve.
        """

        fetchopts = { "type": { "@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions" } }

        search_request = search_request_for_identifier(sample_ident, 'sample')

        for option in ['tags', 'properties', 'attachments', 'space', 'experiment', 'registrator', 'dataSets']:
            fetchopts[option] = fetch_option[option]

        if withAttachments:
            fetchopts['attachments'] = fetch_option['attachmentsWithContent']

        fetchopts["parents"]  = { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" }
        fetchopts["children"] = { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" }

        if withAttachments:
            fetchopts['attachments'] = fetch_option['attachmentsWithContent']

        sample_request = {
            "method": "getSamples",
            "params": [
                self.token,
                [ search_request ],
                fetchopts
            ],
        }

        resp = self._post_request(self.as_v3, sample_request)
        parse_jackson(resp)

        if resp is None or len(resp) == 0:
            raise ValueError('no such sample found: '+sample_ident)
        else:
            for sample_ident in resp:
                if only_data:
                    return resp[sample_ident]
                else:
                    return Sample(self, self.get_sample_type(resp[sample_ident]["type"]["code"]), resp[sample_ident])


    def new_space(self, name, description=None):
        """ Creates a new space in the openBIS instance. Returns a list of all spaces
        """
        request = {
            "method": "createSpaces",
            "params": [
                self.token,
                [ {
                    "@id": 0,
                    "code": name,
                    "description": description,
                    "@type": "as.dto.space.create.SpaceCreation"
                } ]
            ],
        }
        resp = self._post_request(self.as_v3, request)
        return self.get_spaces(refresh=True)


    def new_analysis(self, name, description=None, sample=None, dss_code=None, result_files=None,
    notebook_files=None, parents=None):

        """ An analysis contains the Jupyter notebook file(s) and some result files.
            Technically this method involves uploading files to the session workspace
            and activating the dropbox aka dataset ingestion service "jupyter-uploader-api"
        """

        if dss_code is None:
            dss_code = self.get_datastores()['code'][0]

        # if a sample identifier was given, use it as a string.
        # if a sample object was given, take its identifier
        sampleId = None
        if isinstance(sample, str):
            if (is_identifier(sample)):
                sampleId = { 
                    "identifier": sample,
                    "@type": "as.dto.sample.id.SampleIdentifier"
                }
            else:
                sampleId = { 
                    "permId": sample,
                    "@type": "as.dto.sample.id.SamplePermId"
                }
        else:
            sampleId = { 
                "identifier": sample.identifier,
                "@type": "as.dto.sample.id.SampleIdentifier"
            }

        parentIds = []
        if parents is not None:
            if not isinstance(parents, list):
                parants = [parents]
            for parent in parents:
                parentIds.append(parent.permId)
        
        datastore_url = self._get_dss_url(dss_code)
        folder = time.strftime('%Y-%m-%d_%H-%M-%S')

        # upload the files
        data_sets = []
        if notebook_files is not None:
            notebooks_folder = os.path.join(folder, 'notebook_files')
            self.upload_files(
                datastore_url = datastore_url,
                files=notebook_files,
                folder= notebooks_folder, 
                wait_until_finished=True
            )
            data_sets.append({
                "dataSetType" : "JUPYTER_NOTEBOOk",
                "sessionWorkspaceFolder": notebooks_folder,
                "fileNames" : notebook_files,
                "properties" : {}
            })
        if result_files is not None:
            results_folder = os.path.join(folder, 'result_files')
            self.upload_files(
                datastore_url = datastore_url,
                files=result_files,
                folder=results_folder,
                wait_until_finished=True
            )
            data_sets.append({
                "dataSetType" : "JUPYTER_RESULT",
                "sessionWorkspaceFolder" : results_folder,
                "fileNames" : result_files,
                "properties" : {}
            })

        # register the files in openBIS
        request = {
          "method": "createReportFromAggregationService",
          "params": [
            self.token,
            dss_code,
            DROPBOX_PLUGIN,
            { 
            	"sample" : {
                    "identifier" : sample.identifier
                },
                "sampleId": sampleId,
                "parentIds": parentIds,
                "containers" : [ 
                    {
                    	"dataSetType" : "JUPYTER_CONTAINER",
                    	"properties" : {
                			"NAME" : name,
                			"DESCRIPTION" : description
                    	}
                    }
                ],
                "dataSets" : data_sets,
            }
          ],
        }
        
        resp = self._post_request(self.reg_v1, request)
        try:
            if resp['rows'][0][0]['value'] == 'OK':
                return resp['rows'][0][1]['value']
        except:
            return resp


    def new_sample(self, type, **kwargs):
        """ Creates a new sample of a given sample type.
        """
        return Sample(self, self.get_sample_type(type), None, **kwargs)


    def _get_dss_url(self, dss_code=None):
        """ internal method to get the downloadURL of a datastore.
        """
        dss = self.get_datastores()
        if dss_code is None:
            return dss['downloadUrl'][0]
        else:
            return dss[dss['code'] == dss_code]['downloadUrl'][0]
        

    def upload_files(self, datastore_url=None, files=None, folder=None, wait_until_finished=False):

        if datastore_url is None:
            datastore_url = self._get_dss_url()

        if files is None:
            raise ValueError("Please provide a filename.")

        if folder is None:
            # create a unique foldername
            folder = time.strftime('%Y-%m-%d_%H-%M-%S')

        if isinstance(files, str):
            files = [files]

        self.files = files
        self.startByte = 0
        self.endByte   = 0
    
        # define a queue to handle the upload threads
        queue = DataSetUploadQueue()

        real_files = []
        for filename in files:
            if os.path.isdir(filename):
                real_files.extend([os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(filename)) for f in fn])
            else:
                real_files.append(os.path.join(filename))

        # compose the upload-URL and put URL and filename in the upload queue 
        for filename in real_files:
            file_in_wsp = os.path.join(folder, filename)
            self.files_in_wsp.append(file_in_wsp)
            upload_url = (
                datastore_url + '/session_workspace_file_upload'
                + '?filename=' + os.path.join(folder,filename)
                + '&id=1'
                + '&startByte=0&endByte=0'
                + '&sessionID=' + self.token
            )
            queue.put([upload_url, filename, self.verify_certificates])

        # wait until all files have uploaded
        if wait_until_finished:
            queue.join()

        # return files with full path in session workspace
        return self.files_in_wsp


class DataSetUploadQueue:
   
    def __init__(self, workers=20):
        # maximum files to be uploaded at once
        self.upload_queue = Queue()

        # define number of threads and start them
        for t in range(workers):
            t = Thread(target=self.upload_file)
            t.daemon = True
            t.start()


    def put(self, things):
        """ expects a list [url, filename] which is put into the upload queue
        """
        self.upload_queue.put(things)


    def join(self):
        """ needs to be called if you want to wait for all uploads to be finished
        """
        self.upload_queue.join()


    def upload_file(self):
        while True:
            # get the next item in the queue
            upload_url, filename, verify_certificates = self.upload_queue.get()

            filesize = os.path.getsize(filename)

            # upload the file to our DSS session workspace
            with open(filename, 'rb') as f:
                resp = requests.post(upload_url, data=f, verify=verify_certificates)
                resp.raise_for_status()
                data = resp.json()
                assert filesize == int(data['size'])

            # Tell the queue that we are done
            self.upload_queue.task_done()


class DataSetDownloadQueue:
    
    def __init__(self, workers=20):
        # maximum files to be downloaded at once
        self.download_queue = Queue()

        # define number of threads
        for t in range(workers):
            t = Thread(target=self.download_file)
            t.daemon = True
            t.start()


    def put(self, things):
        """ expects a list [url, filename] which is put into the download queue
        """
        self.download_queue.put(things)


    def join(self):
        """ needs to be called if you want to wait for all downloads to be finished
        """
        self.download_queue.join()


    def download_file(self):
        while True:
            url, filename, file_size, verify_certificates = self.download_queue.get()
            # create the necessary directory structure if they don't exist yet
            os.makedirs(os.path.dirname(filename), exist_ok=True)

            # request the file in streaming mode
            r = requests.get(url, stream=True, verify=verify_certificates)
            with open(filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024): 
                    if chunk: # filter out keep-alive new chunks
                        f.write(chunk)

            assert os.path.getsize(filename) == int(file_size)
            self.download_queue.task_done()


class DataSet():
    """ DataSet are openBIS objects that contain the actual files.
    """

    def __init__(self, openbis_obj, data):
        self.data = data
        self.permid = data["code"]
        self.permId = data["code"]
        self.openbis = openbis_obj
        if data['physicalData'] is None:
            self.shareId = None
            self.location = None
        else:
            self.shareId = data['physicalData']['shareId']
            self.location = data['physicalData']['location']

    def _repr_html_(self):
        html = """
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>attribute</th>
      <th>value</th>
    </tr>
  </thead>
  <tbody>
    <tr> <th>permId</th> <td>{}</td> </tr>
    <tr> <th>properties</th> <td>{}</td> </tr>
    <tr> <th>tags</th> <td>{}</td> </tr>
  </tbody>
</table>
        """
        return html.format(self.permid, self.data['properties'], self.data['tags'])

    def download(self, files=None, wait_until_finished=True, workers=10):
        """ download the actual files and put them by default in the following folder:
        __current_dir__/hostname/dataset_permid/
        If no files are specified, all files of a given dataset are downloaded.
        Files are usually downloaded in parallel, using 10 workers by default. If you want to wait until