pybis.py

            
        parse_jackson(objects)

        projects = DataFrame(objects)
        if len(projects) is 0:
            raise ValueError("No projects found!")

        projects['registrationDate'] = projects['registrationDate'].map(format_timestamp)
        projects['modificationDate'] = projects['modificationDate'].map(format_timestamp)
        projects['leader'] = projects['leader'].map(extract_person)
        projects['registrator'] = projects['registrator'].map(extract_person)
        projects['modifier'] = projects['modifier'].map(extract_person)
        projects['permId'] = projects['permId'].map(extract_permid)
        projects['identifier'] = projects['identifier'].map(extract_identifier)

        pros = projects[['identifier', 'permId', 'leader', 'registrator', 'registrationDate',
                            'modifier', 'modificationDate']]
        return Things(self, 'project', pros, 'identifier')

    def _create_get_request(self, method_name, entity, permids, options):

        if not isinstance(permids, list):
            permids = [permids]

        type = "as.dto.{}.id.{}".format(entity.lower(), entity.capitalize())
        search_params = []
        for permid in permids:
            # decide if we got a permId or an identifier
            match = re.match('/', permid)
            if match:
                search_params.append(
                    {"identifier": permid, "@type": type + 'Identifier'}
                )
            else:
                search_params.append(
                    {"permId": permid, "@type": type + 'PermId'}
                )

        fo = {}
        for option in options:
            fo[option] = fetch_option[option]

        request = {
            "method": method_name,
            "params": [
                self.token,
                search_params,
                fo
            ],
        }
        return request

    def get_terms(self, vocabulary=None):
        """ Returns information about vocabulary, including its controlled vocabulary
        """

        search_request = {}
        if vocabulary is not None:
            search_request = _gen_search_criteria({
                "vocabulary": "VocabularyTerm",
                "criteria": [{
                    "vocabulary": "Vocabulary",
                    "code": vocabulary
                }]
            })

        fetch_options = {
            "vocabulary": {"@type": "as.dto.vocabulary.fetchoptions.VocabularyFetchOptions"},
            "@type": "as.dto.vocabulary.fetchoptions.VocabularyTermFetchOptions"
        }

        request = {
            "method": "searchVocabularyTerms",
            "params": [self.token, search_request, fetch_options]
        }
        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)
        return Vocabulary(resp)

    def get_tags(self):
        """ Returns a DataFrame of all 
        """
        request = {
            "method": "searchTags",
            "params": [self.token, {}, {}]
        }
        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)
        objects = DataFrame(resp['objects'])
        objects['registrationDate'] = objects['registrationDate'].map(format_timestamp)
        return objects[['code', 'registrationDate']]
    
    def _search_semantic_annotations(self, criteria):

        fetch_options = {
            "@type": "as.dto.semanticannotation.fetchoptions.SemanticAnnotationFetchOptions",
            "entityType": {"@type": "as.dto.entitytype.fetchoptions.EntityTypeFetchOptions"},
            "propertyType": {"@type": "as.dto.property.fetchoptions.PropertyTypeFetchOptions"},
            "propertyAssignment": {
                "@type": "as.dto.property.fetchoptions.PropertyAssignmentFetchOptions",
                "entityType" : {
                    "@type" : "as.dto.entitytype.fetchoptions.EntityTypeFetchOptions"
                },
                "propertyType" : {
                    "@type" : "as.dto.property.fetchoptions.PropertyTypeFetchOptions"
                }
            }
        }

        request = {
            "method": "searchSemanticAnnotations",
            "params": [self.token, criteria, fetch_options]
        }

        resp = self._post_request(self.as_v3, request)
        
        if resp is not None:
            objects = resp['objects']
            
            if len(objects) is 0:
                raise ValueError("No semantic annotations found!")
            
            parse_jackson(objects)
            
            for object in objects:
                object['permId'] = object['permId']['permId']
                if object.get('entityType') is not None:
                    object['entityType'] = object['entityType']['code']
                elif object.get('propertyType') is not None:
                    object['propertyType'] = object['propertyType']['code']
                elif object.get('propertyAssignment') is not None:
                    object['entityType'] = object['propertyAssignment']['entityType']['code']
                    object['propertyType'] = object['propertyAssignment']['propertyType']['code']
                object['creationDate'] = format_timestamp(object['creationDate'])
                
            return objects
        else:
            raise ValueError("No semantic annotations found!")

    def get_semantic_annotations(self):
        """ Get a list of all available semantic annotations (DataFrame object).
        """

        objects = self._search_semantic_annotations({})
        attrs = ['permId', 'entityType', 'propertyType', 'predicateOntologyId', 'predicateOntologyVersion', 'predicateAccessionId', 'descriptorOntologyId', 'descriptorOntologyVersion', 'descriptorAccessionId', 'creationDate']
        annotations = DataFrame(objects)
        return Things(self, 'semantic_annotation', annotations[attrs], 'permId')
    
    def get_semantic_annotation(self, permId, only_data = False):

        criteria = {
            "@type" : "as.dto.semanticannotation.search.SemanticAnnotationSearchCriteria",
            "criteria" : [{
                "@type" : "as.dto.common.search.PermIdSearchCriteria",
                "fieldValue" : {
                    "@type" : "as.dto.common.search.StringEqualToValue",
                    "value" : permId
                }
            }]
        }

        objects = self._search_semantic_annotations(criteria)
        object = objects[0]

        if only_data:
            return object
        else:
            return SemanticAnnotation(self, isNew=False, **object)    
    
    def get_sample_types(self, type=None):
        """ Returns a list of all available sample types
        """
        return self._get_types_of(
            "searchSampleTypes",
            "Sample",
            type,
            ["generatedCodePrefix"]
        )

    get_object_types = get_sample_types # Alias

    def get_sample_type(self, type):
        try:
            return self._get_types_of(
                "searchSampleTypes",
                "Sample",
                type,
                ["generatedCodePrefix"]
            )
        except Exception:
            raise ValueError("no such sample type: {}".format(type))

    get_object_type = get_sample_type # Alias

    def get_experiment_types(self, type=None):
        """ Returns a list of all available experiment types
        """
        return self._get_types_of(
            "searchExperimentTypes",
            "Experiment",
            type
        )

    def get_experiment_type(self, type):
        try:
            return self._get_types_of(
                "searchExperimentTypes",
                "Experiment",
                type
            )
        except Exception:
            raise ValueError("No such experiment type: {}".format(type))

    def get_material_types(self, type=None):
        """ Returns a list of all available material types
        """
        return self._get_types_of("searchMaterialTypes", "Material", type)

    def get_material_type(self, type):
        try:
            return self._get_types_of("searchMaterialTypes", "Material", type)
        except Exception:
            raise ValueError("No such material type: {}".format(type))

    def get_dataset_types(self, type=None):
        """ Returns a list (DataFrame object) of all currently available dataset types
        """
        return self._get_types_of("searchDataSetTypes", "DataSet", type, optional_attributes=['kind'])

    def get_dataset_type(self, type):
        try:
            return self._get_types_of("searchDataSetTypes", "DataSet", type, optional_attributes=['kind'])
        except Exception:
            raise ValueError("No such dataSet type: {}".format(type))

    def _get_types_of(self, method_name, entity, type_name=None, additional_attributes=[], optional_attributes=[]):
        """ Returns a list of all available types of an entity.
        If the name of the entity-type is given, it returns a PropertyAssignments object
        """

        search_request = {}
        fetch_options = {}

        if type_name is not None:
            search_request = _gen_search_criteria({
                entity.lower(): entity + "Type",
                "operator": "AND",
                "code": type_name
            })

            fetch_options = {
                "@type": "as.dto.{}.fetchoptions.{}TypeFetchOptions".format(
                    entity.lower(), entity
                )
            }
            fetch_options['propertyAssignments'] = fetch_option['propertyAssignments']

        request = {
            "method": method_name,
            "params": [self.token, search_request, fetch_options],
        }
        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)

        if type_name is not None and len(resp['objects']) == 1:
            return PropertyAssignments(self, resp['objects'][0])
        if len(resp['objects']) >= 1:
            types = DataFrame(resp['objects'])
            types['modificationDate'] = types['modificationDate'].map(format_timestamp)
            attributes = self._get_attributes(type_name, types, additional_attributes, optional_attributes)
            return Things(self, entity.lower() + '_type', types[attributes])

        else:
            raise ValueError("Nothing found!")

    def _get_attributes(self, type_name, types, additional_attributes, optional_attributes):
        attributes = ['code', 'description'] + additional_attributes
        attributes += [attribute for attribute in optional_attributes if attribute in types]
        attributes += ['modificationDate']
        if type_name is not None:
            attributes += ['propertyAssignments']
        return attributes

    def is_session_active(self):
        """ checks whether a session is still active. Returns true or false.
        """
        return self.is_token_valid(self.token)

    def is_token_valid(self, token=None):
        """Check if the connection to openBIS is valid.
        This method is useful to check if a token is still valid or if it has timed out,
        requiring the user to login again.
        :return: Return True if the token is valid, False if it is not valid.
        """
        if token is None:
            token = self.token

        if token is None:
            return False

        request = {
            "method": "isSessionActive",
            "params": [token],
        }
        resp = self._post_request(self.as_v1, request)
        return resp

    def get_dataset(self, permid, only_data=False):
        """fetch a dataset and some metadata attached to it:
        - properties
        - sample
        - parents
        - children
        - containers
        - dataStore
        - physicalData
        - linkedData
        :return: a DataSet object
        """

        criteria = [{
            "permId": permid,
            "@type": "as.dto.dataset.id.DataSetPermId"
        }]

        fetchopts = {
            "parents": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
            "children": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
            "containers": {"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions"},
            "type": {"@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions"},
        }

        for option in ['tags', 'properties', 'dataStore', 'physicalData', 'linkedData',
                       'experiment', 'sample']:
            fetchopts[option] = fetch_option[option]

        request = {
            "method": "getDataSets",
            "params": [
                self.token,
                criteria,
                fetchopts,
            ],
        }

        resp = self._post_request(self.as_v3, request)
        if resp is None or len(resp) == 0:
            raise ValueError('no such dataset found: ' + permid)

        parse_jackson(resp)

        for permid in resp:
            if only_data:
                return resp[permid]
            else:
                return DataSet(
                    self, 
                    type=self.get_dataset_type(resp[permid]["type"]["code"]),
                    data=resp[permid]
                )

    def get_sample(self, sample_ident, only_data=False, withAttachments=False):
        """Retrieve metadata for the sample.
        Get metadata for the sample and any directly connected parents of the sample to allow access
        to the same information visible in the ELN UI. The metadata will be on the file system.
        :param sample_identifiers: A list of sample identifiers to retrieve.
        """

        search_request = search_request_for_identifier(sample_ident, 'sample')

        fetchopts = {"type": {"@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions"}}
        for option in ['tags', 'properties', 'attachments', 'space', 'experiment', 'registrator', 'dataSets']:
            fetchopts[option] = fetch_option[option]

        if withAttachments:
            fetchopts['attachments'] = fetch_option['attachmentsWithContent']

        for key in ['parents','children','container','components']:
            fetchopts[key] = {"@type": "as.dto.sample.fetchoptions.SampleFetchOptions"}

        sample_request = {
            "method": "getSamples",
            "params": [
                self.token,
                [search_request],
                fetchopts
            ],
        }

        resp = self._post_request(self.as_v3, sample_request)
        parse_jackson(resp)

        if resp is None or len(resp) == 0:
            raise ValueError('no such sample found: ' + sample_ident)
        else:
            for sample_ident in resp:
                if only_data:
                    return resp[sample_ident]
                else:
                    return Sample(self, self.get_sample_type(resp[sample_ident]["type"]["code"]), resp[sample_ident])

    get_object = get_sample # Alias

    def get_external_data_management_system(self, permId, only_data=False):
        """Retrieve metadata for the external data management system.
        :param permId: A permId for an external DMS.
        :param only_data: Return the result data as a hash-map, not an object.
        """

        request = {
            "method": "getExternalDataManagementSystems",
            "params": [
                self.token,
                [{
                    "@type": "as.dto.externaldms.id.ExternalDmsPermId",
                    "permId": permId
                }],
                {},
            ],
        }

        resp = self._post_request(self.as_v3, request)
        parse_jackson(resp)

        if resp is None or len(resp) == 0:
            raise ValueError('no such external DMS found: ' + permId)
        else:
            for ident in resp:
                if only_data:
                    return resp[ident]
                else:
                    return ExternalDMS(self, resp[ident])

    def new_space(self, **kwargs):
        """ Creates a new space in the openBIS instance.
        """
        return Space(self, None, **kwargs)


    def new_analysis(self, name, description=None, sample=None, dss_code=None, result_files=None,
                     notebook_files=None, parents=None):

        """ An analysis contains the Jupyter notebook file(s) and some result files.
            Technically this method involves uploading files to the session workspace
            and activating the dropbox aka dataset ingestion service "jupyter-uploader-api"
        """

        if dss_code is None:
            dss_code = self.get_datastores()['code'][0]

        # if a sample identifier was given, use it as a string.
        # if a sample object was given, take its identifier
        sampleId = self.sample_to_sample_id(sample)

        parentIds = []
        if parents is not None:
            if not isinstance(parents, list):
                parants = [parents]
            for parent in parents:
                parentIds.append(parent.permId)

        datastore_url = self._get_dss_url(dss_code)
        folder = time.strftime('%Y-%m-%d_%H-%M-%S')

        # upload the files
        data_sets = []
        if notebook_files is not None:
            notebooks_folder = os.path.join(folder, 'notebook_files')
            self.upload_files(
                datastore_url=datastore_url,
                files=notebook_files,
                folder=notebooks_folder,
                wait_until_finished=True
            )
            data_sets.append({
                "dataSetType": "JUPYTER_NOTEBOOk",
                "sessionWorkspaceFolder": notebooks_folder,
                "fileNames": notebook_files,
                "properties": {}
            })
        if result_files is not None:
            results_folder = os.path.join(folder, 'result_files')
            self.upload_files(
                datastore_url=datastore_url,
                files=result_files,
                folder=results_folder,
                wait_until_finished=True
            )
            data_sets.append({
                "dataSetType": "JUPYTER_RESULT",
                "sessionWorkspaceFolder": results_folder,
                "fileNames": result_files,
                "properties": {}
            })

        # register the files in openBIS
        request = {
            "method": "createReportFromAggregationService",
            "params": [
                self.token,
                dss_code,
                PYBIS_PLUGIN,
                {
                    "sampleId": sampleId,
                    "parentIds": parentIds,
                    "containers": [{
                        "dataSetType": "JUPYTER_CONTAINER",
                        "properties": {
                            "NAME": name,
                            "DESCRIPTION": description
                        }
                    }],
                    "dataSets": data_sets,
                }
            ],
        }

        resp = self._post_request(self.reg_v1, request)
        try:
            if resp['rows'][0][0]['value'] == 'OK':
                return resp['rows'][0][1]['value']
        except:
            return resp

    def new_git_data_set(self, data_set_type, path, commit_id, repository_id, dms, sample=None, experiment=None, properties={},
                         dss_code=None, parents=None, data_set_code=None, contents=[]):
        """ Create a link data set.
        :param data_set_type: The type of the data set
        :param data_set_type: The type of the data set
        :param path: The path to the git repository
        :param commit_id: The git commit id
        :param repository_id: The git repository id - same for copies
        :param dms: An external data managment system object or external_dms_id
        :param sample: A sample object or sample id.
        :param dss_code: Code for the DSS -- defaults to the first dss if none is supplied.
        :param properties: Properties for the data set.
        :param parents: Parents for the data set.
        :param data_set_code: A data set code -- used if provided, otherwise generated on the server
        :param contents: A list of dicts that describe the contents:
            {'file_length': [file length],
             'crc32': [crc32 checksum],
             'directory': [is path a directory?]
             'path': [the relative path string]}
        :return: A DataSet object
        """
        return pbds.GitDataSetCreation(self, data_set_type, path, commit_id, repository_id, dms, sample, experiment,
                                       properties, dss_code, parents, data_set_code, contents).new_git_data_set()

    def new_content_copy(self, path, commit_id, repository_id, edms_id, data_set_id):
        """
        Create a content copy in an existing link data set.
        :param path: path of the new content copy
        "param commit_id: commit id of the new content copy
        "param repository_id: repository id of the content copy
        "param edms_id: Id of the external data managment system of the content copy
        "param data_set_id: Id of the data set to which the new content copy belongs
        """
        return pbds.GitDataSetUpdate(self, path, commit_id, repository_id, edms_id, data_set_id).new_content_copy()

    @staticmethod
    def sample_to_sample_id(sample):
        """Take sample which may be a string or object and return an identifier for it."""
        return Openbis._object_to_object_id(sample, "as.dto.sample.id.SampleIdentifier", "as.dto.sample.id.SamplePermId");

    @staticmethod
    def experiment_to_experiment_id(experiment):
        """Take experiment which may be a string or object and return an identifier for it."""
        return Openbis._object_to_object_id(experiment, "as.dto.experiment.id.ExperimentIdentifier", "as.dto.experiment.id.SamplePermId");

    @staticmethod
    def _object_to_object_id(obj, identifierType, permIdType):
        object_id = None
        if isinstance(obj, str):
            if (is_identifier(obj)):
                object_id = {
                    "identifier": obj,
                    "@type": identifierType
                }
            else:
                object_id = {
                    "permId": obj,
                    "@type": permIdType
                }
        else:
            object_id = {
                "identifier": obj.identifier,
                "@type": identifierType
            }
        return object_id

    @staticmethod
    def data_set_to_data_set_id(data_set):
        if isinstance(data_set, str):
            code = data_set
        else:
            code = data_set.permId
        return {
            "permId": code,
            "@type": "as.dto.dataset.id.DataSetPermId"
        }

    def external_data_managment_system_to_dms_id(self, dms):
        if isinstance(dms, str):
            dms_id = {
                "permId": dms,
                "@type": "as.dto.externaldms.id.ExternalDmsPermId"
            }
        else:
            dms_id = {
                "identifier": dms.code,
                "@type": "as.dto.sample.id.SampleIdentifier"
            }
        return dms_id

    def new_sample(self, type, props=None, **kwargs):
        """ Creates a new sample of a given sample type.
        """
        return Sample(self, self.get_sample_type(type), None, props, **kwargs)

    new_object = new_sample # Alias

    def new_dataset(self, type=None, files=None, props=None, folder=None, **kwargs):
        """ Creates a new dataset of a given sample type.
        """
        if files is None:
            raise ValueError('please provide at least one file')
        elif isinstance(files, str):
            files = [files]

        type_obj = self.get_dataset_type(type.upper())

        return DataSet(self, type=type_obj, files=files, folder=folder, props=props, **kwargs)
    
    def new_semantic_annotation(self, entityType=None, propertyType=None, **kwargs):
        return SemanticAnnotation(
            openbis_obj=self, isNew=True, 
            entityType=entityType, propertyType=propertyType, **kwargs
        )    

    def _get_dss_url(self, dss_code=None):
        """ internal method to get the downloadURL of a datastore.
        """
        dss = self.get_datastores()
        if dss_code is None:
            return dss['downloadUrl'][0]
        else:
            return dss[dss['code'] == dss_code]['downloadUrl'][0]

    def upload_files(self, datastore_url=None, files=None, folder=None, wait_until_finished=False):

        if datastore_url is None:
            datastore_url = self._get_dss_url()

        if files is None:
            raise ValueError("Please provide a filename.")

        if folder is None:
            # create a unique foldername
            folder = time.strftime('%Y-%m-%d_%H-%M-%S')

        if isinstance(files, str):
            files = [files]

        self.files = files
        self.startByte = 0
        self.endByte = 0

        # define a queue to handle the upload threads
        queue = DataSetUploadQueue()

        real_files = []
        for filename in files:
            if os.path.isdir(filename):
                real_files.extend(
                    [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(filename)) for f in fn])
            else:
                real_files.append(os.path.join(filename))

        # compose the upload-URL and put URL and filename in the upload queue 
        for filename in real_files:
            file_in_wsp = os.path.join(folder, filename)
            self.files_in_wsp.append(file_in_wsp)
            upload_url = (
                datastore_url + '/session_workspace_file_upload'
                + '?filename=' + os.path.join(folder, quote(filename))
                + '&id=1'
                + '&startByte=0&endByte=0'
                + '&sessionID=' + self.token
            )
            queue.put([upload_url, filename, self.verify_certificates])

        # wait until all files have uploaded
        if wait_until_finished:
            queue.join()

        # return files with full path in session workspace
        return self.files_in_wsp


class DataSetUploadQueue():
    def __init__(self, workers=20):
        # maximum files to be uploaded at once
        self.upload_queue = Queue()

        # define number of threads and start them
        for t in range(workers):
            t = Thread(target=self.upload_file)
            t.daemon = True
            t.start()

    def put(self, things):
        """ expects a list [url, filename] which is put into the upload queue
        """
        self.upload_queue.put(things)

    def join(self):
        """ needs to be called if you want to wait for all uploads to be finished
        """
        self.upload_queue.join()

    def upload_file(self):
        while True:
            # get the next item in the queue
            upload_url, filename, verify_certificates = self.upload_queue.get()

            filesize = os.path.getsize(filename)

            # upload the file to our DSS session workspace
            with open(filename, 'rb') as f:
                resp = requests.post(upload_url, data=f, verify=verify_certificates)
                resp.raise_for_status()
                data = resp.json()
                assert filesize == int(data['size'])

            # Tell the queue that we are done
            self.upload_queue.task_done()


class DataSetDownloadQueue():
    def __init__(self, workers=20):
        # maximum files to be downloaded at once
        self.download_queue = Queue()

        # define number of threads
        for t in range(workers):
            t = Thread(target=self.download_file)
            t.daemon = True
            t.start()

    def put(self, things):
        """ expects a list [url, filename] which is put into the download queue
        """
        self.download_queue.put(things)

    def join(self):
        """ needs to be called if you want to wait for all downloads to be finished
        """
        self.download_queue.join()

    def download_file(self):
        while True:
            url, filename, file_size, verify_certificates = self.download_queue.get()
            # create the necessary directory structure if they don't exist yet
            os.makedirs(os.path.dirname(filename), exist_ok=True)

            # request the file in streaming mode
            r = requests.get(url, stream=True, verify=verify_certificates)
            with open(filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)

            assert os.path.getsize(filename) == int(file_size)
            self.download_queue.task_done()


class OpenBisObject():
    def __init__(self, openbis_obj, type, data=None, props=None, **kwargs):
        self.__dict__['openbis'] = openbis_obj
        self.__dict__['type'] = type
        self.__dict__['p'] = PropertyHolder(openbis_obj, type)
        self.__dict__['a'] = AttrHolder(openbis_obj, 'DataSet', type)

        # existing OpenBIS object
        if data is not None:
            self._set_data(data)

        if props is not None:
            for key in props:
                setattr(self.p, key, props[key])

        if kwargs is not None:
            for key in kwargs:
                setattr(self, key, kwargs[key])

    def __eq__(self, other):
        return str(self) == str(other)

    def __ne__(self, other):
        return str(self) != str(other)

    def _set_data(self, data):
        # assign the attribute data to self.a by calling it
        # (invoking the AttrHolder.__call__ function)
        self.a(data)
        self.__dict__['data'] = data

        # put the properties in the self.p namespace (without checking them)
        if 'properties' in data:
            for key, value in data['properties'].items():
                self.p.__dict__[key.lower()] = value

    @property
    def attrs(self):
        return self.__dict__['a']

    @property
    def space(self):
        try:
            return self.openbis.get_space(self._space['permId'])
        except Exception:
            pass

    @property
    def project(self):
        try:
            return self.openbis.get_project(self._project['identifier'])
        except Exception:
            pass

    @property
    def experiment(self):
        try:
            return self.openbis.get_experiment(self._experiment['identifier'])
        except Exception:
            pass

    @property
    def sample(self):
        try:
            return self.openbis.get_sample(self._sample['identifier'])
        except Exception:
            pass

    object = sample # Alias

    def __getattr__(self, name):
        return getattr(self.__dict__['a'], name)

    def __setattr__(self, name, value):
        if name in ['set_properties', 'set_tags', 'add_tags']:
            raise ValueError("These are methods which should not be overwritten")
        setattr(self.__dict__['a'], name, value)

    def _repr_html_(self):
        """Print all the assigned attributes (identifier, tags, etc.) in a nicely formatted table. See
        AttributeHolder class.
        """
        return self.a._repr_html_()

    def __repr__(self):
        """same thing as _repr_html_() but for IPython
        """
        return self.a.__repr__()


class LinkedData():
    def __init__(self, data=None):
        self.data = data if data is not None else []
        self.attrs = ['externalCode', 'contentCopies']

    def __dir__(self):
        return self.attrs

    def __getattr__(self, name):
        if name in self.attrs:
            if name in self.data:
                return self.data[name]
        else:
            return ''


class PhysicalData():
    def __init__(self, data=None):
        if data is None:
            data = []
        self.data = data
        self.attrs = ['speedHint', 'complete', 'shareId', 'size',
                      'fileFormatType', 'storageFormat', 'location', 'presentInArchive',
                      'storageConfirmation', 'locatorType', 'status']

    def __dir__(self):
        return self.attrs

    def __getattr__(self, name):
        if name in self.attrs:
            if name in self.data:
                return self.data[name]
        else:
            return ''

    def _repr_html_(self):
        html = """
            <table border="1" class="dataframe">
            <thead>
                <tr style="text-align: right;">
                <th>attribute</th>
                <th>value</th>
                </tr>
            </thead>
            <tbody>
        """

        for attr in self.attrs:
            html += "<tr> <td>{}</td> <td>{}</td> </tr>".format(
                attr, getattr(self, attr, '')
            )

        html += """
            </tbody>
            </table>
        """
        return html

    def __repr__(self):

        headers = ['attribute', 'value']
        lines = []
        for attr in self.attrs:
            lines.append([
                attr,
                getattr(self, attr, '')
            ])
        return tabulate(lines, headers=headers)


class DataSet(OpenBisObject):
    """ DataSet are openBIS objects that contain the actual files.
    """

    def __init__(self, openbis_obj, type=type, data=None, files=None, folder=None, props=None, **kwargs):
        super(DataSet, self).__init__(openbis_obj, type, data, props, **kwargs)

        # existing DataSet
        if data is not None:
            if data['physicalData'] is None:
                self.__dict__['shareId'] = None
                self.__dict__['location'] = None
            else:
                self.__dict__['shareId'] = data['physicalData']['shareId']
                self.__dict__['location'] = data['physicalData']['location']
        
        # new DataSet
        if files is not None:
            self.__dict__['files'] = files

        self.__dict__['folder'] = folder


    def __str__(self):
        return self.data['code']

    def __dir__(self):
        return [
            'props', 'get_parents()', 'get_children()', 
            'add_parents()', 'add_children()', 'del_parents()', 'del_children()',
            'sample', 'experiment', 'physicalData', 'linkedData',
            'tags', 'set_tags()', 'add_tags()', 'del_tags()',
            'add_attachment()', 'get_attachments()', 'download_attachments()',
            "get_files(start_folder='/')", 'file_list',
            'download(files=None, destination=None, wait_until_finished=True)', 
            'status', 'archive()', 'unarchive()', 'data'
        ]

    def __setattr__(self, name, value):
        if name in ['folder']:
            self.__dict__[name] = value
        else:
            super(DataSet, self).__setattr__(name, value)

    @property
    def props(self):
        return self.__dict__['p']

    @property
    def type(self):
        return self.__dict__['type']

    @type.setter
    def type(self, type_name):
        dataset_type = self.openbis.get_dataset_type(type_name.upper())
        self.p.__dict__['_type'] = dataset_type
        self.a.__dict__['_type'] = dataset_type

    @property
    def physicalData(self):
        if 'physicalData' in self.data:
            return PhysicalData(self.data['physicalData'])

    @property