Skip to content
Snippets Groups Projects
pybis.py 126 KiB
Newer Older
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
pybis.py

Swen Vermeul's avatar
Swen Vermeul committed
Work with openBIS from Python.
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

import json
import re
from urllib.parse import urlparse
import zlib
from texttable import Texttable
from tabulate import tabulate

from pybis.utils import parse_jackson, check_datatype, split_identifier, format_timestamp, is_identifier, is_permid, nvl
from pybis.property import PropertyHolder, PropertyAssignments
from pybis.masterdata import Vocabulary
import pandas as pd
from pandas import DataFrame, Series

import threading
from threading import Thread
from queue import Queue
PYBIS_PLUGIN = "jupyter-uploader-api"
def _definitions(entity):
        "Space": {
            "attrs_new": "code description".split(),
            "attrs_up": "description".split(),
            "attrs": "code permId description registrator registrationDate modificationDate".split(),
            "identifier": "spaceId",
        },
        "Project": {
            "attrs_new": "code description space attachments".split(),
            "attrs_up": "description space attachments".split(),
            "attrs": "code description permId identifier space leader registrator registrationDate modifier modificationDate attachments".split(),
            "multi": "".split(),
            "identifier": "projectId",
        },
        "Experiment": {
            "attrs_new": "code type project tags attachments".split(),
            "attrs_up": "project tags attachments".split(),
            "attrs": "code permId identifier type project tags attachments".split(),
            "multi": "tags attachments".split(),
            "identifier": "experimentId",
        },
            "attrs_new": "code type parents children space experiment tags attachments".split(),
            "attrs_up": "parents children space experiment tags attachments".split(),
            "attrs": "code permId identifier type parents children components space experiment tags attachments".split(),
                'parentIds': {'permId': {'@type': 'as.dto.sample.id.SamplePermId'}},
                'childIds': {'permId': {'@type': 'as.dto.sample.id.SamplePermId'}},
                'componentIds': {'permId': {'@type': 'as.dto.sample.id.SamplePermId'}},
            },
            "identifier": "sampleId",
            "cre_type": "as.dto.sample.create.SampleCreation",
            "multi": "parents children components tags attachments".split(),
            "attrs_new": "type experiment sample parents children container components tags".split(),
            "attrs_up": "parents children experiment sample container components tags".split(),
            "attrs": "code permId type experiment sample parents children container components tags accessDate dataProducer dataProductionDate registrator registrationDate modifier modificationDate dataStore measured".split(),
                'parentIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
                'childIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
                'componentIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
                'containerIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
            "multi": "parents children container".split(),
        "Material": {
            "attrs_new": "code description type creation tags".split(),
            "attrs": "code description type creation registrator tags".split()
        },
        "Tag": {
            "attrs_new": "code description experiments samples dataSets materials".split(),
            "attrs": "code description experiments samples dataSets materials registrationDate".split(),
        },
            "space": "spaceId",
            "project": "projectId",
            "sample": "sampleId",
            "samples": "sampleIds",
            "dataSet": "dataSetId",
            "dataSets": "dataSetIds",
            "experiment": "experimentId",
            "experiments": "experimentIds",
            "material": "materialId",
            "materials": "materialIds",
            "container": "containerId",
            "component": "componentId",
            "components": "componentIds",
            "parents": "parentIds",
            "children": "childIds",
            "tags": "tagIds",
            'spaceId': {'permId': {'@type': 'as.dto.space.id.SpacePermId'}},
            'projectId': {'permId': {'@type': 'as.dto.project.id.ProjectPermId'}},
            'experimentId': {'permId': {'@type': 'as.dto.experiment.id.ExperimentPermId'}},
            'tagIds': {'code': {'@type': 'as.dto.tag.id.TagCode'}},
    return entities[entity]
search_criteria = {
    "space": "as.dto.space.search.SpaceSearchCriteria",
    "project": "as.dto.project.search.ProjectSearchCriteria",
    "experiment": "as.dto.experiment.search.ExperimentSearchCriteria",
    "sample": "as.dto.sample.search.SampleSearchCriteria",
    "dataset": "as.dto.dataset.search.DataSetSearchCriteria",
    "code": "as.dto.common.search.CodeSearchCriteria",
    "sample_type": "as.dto.sample.search.SampleTypeSearchCriteria",
Swen Vermeul's avatar
Swen Vermeul committed
fetch_option = {
    "space": {"@type": "as.dto.space.fetchoptions.SpaceFetchOptions"},
    "project": {"@type": "as.dto.project.fetchoptions.ProjectFetchOptions"},
    "experiment": {
        "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions",
        "type": {"@type": "as.dto.experiment.fetchoptions.ExperimentTypeFetchOptions"}
        "@type": "as.dto.sample.fetchoptions.SampleFetchOptions",
        "type": {"@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions"}
    "samples": {"@type": "as.dto.sample.fetchoptions.SampleFetchOptions"},
    "dataSets": {
        "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions",
        "properties": {"@type": "as.dto.property.fetchoptions.PropertyFetchOptions"},
        "type": {"@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions"},
    "physicalData": {"@type": "as.dto.dataset.fetchoptions.PhysicalDataFetchOptions"},
    "linkedData": {
        "externalDms": {"@type": "as.dto.externaldms.fetchoptions.ExternalDmsFetchOptions"},
        "@type": "as.dto.dataset.fetchoptions.LinkedDataFetchOptions"
    },
    "properties": {"@type": "as.dto.property.fetchoptions.PropertyFetchOptions"},
    "propertyAssignments": {
        "@type": "as.dto.property.fetchoptions.PropertyAssignmentFetchOptions",
        "propertyType": {
            "@type": "as.dto.property.fetchoptions.PropertyTypeFetchOptions"
        }
    },
    "tags": {"@type": "as.dto.tag.fetchoptions.TagFetchOptions"},
    "registrator": {"@type": "as.dto.person.fetchoptions.PersonFetchOptions"},
    "modifier": {"@type": "as.dto.person.fetchoptions.PersonFetchOptions"},
    "leader": {"@type": "as.dto.person.fetchoptions.PersonFetchOptions"},
    "attachments": {"@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions"},
    "attachmentsWithContent": {
        "@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions",
        "content": {
            "@type": "as.dto.common.fetchoptions.EmptyFetchOptions"
        },
    },
    "history": {"@type": "as.dto.history.fetchoptions.HistoryEntryFetchOptions"},
    "dataStore": {"@type": "as.dto.datastore.fetchoptions.DataStoreFetchOptions"},
def search_request_for_identifier(ident, entity):
        search_request = {
            "identifier": ident.upper(),
            "@type": "as.dto.{}.id.{}Identifier".format(entity.lower(), entity.capitalize())
            "@type": "as.dto.{}.id.{}PermId".format(entity.lower(), entity.capitalize())
def extract_code(obj):
Swen Vermeul's avatar
Swen Vermeul committed
    if not isinstance(obj, dict):
        return str(obj)
def extract_deletion(obj):
    del_objs = []
    for deleted_object in obj['deletedObjects']:
        del_objs.append({
            "reason": obj['reason'],
            "permId": deleted_object["id"]["permId"],
            "type": deleted_object["id"]["@type"]
        })
    return del_objs

def extract_identifier(ident):
    if not isinstance(ident, dict):
        return str(ident)
    return ident['identifier']

def extract_nested_identifier(ident):
    if not isinstance(ident, dict):
        return str(ident)
    return ident['identifier']['identifier']

Swen Vermeul's avatar
Swen Vermeul committed
def extract_permid(permid):
    if not isinstance(permid, dict):
        return str(permid)
    return permid['permId']

Swen Vermeul's avatar
Swen Vermeul committed
def extract_nested_permid(permid):
    if not isinstance(permid, dict):
        return str(permid)
    return permid['permId']['permId']

def extract_property_assignments(pas):
    pa_strings = []
    for pa in pas:
        if not isinstance(pa['propertyType'], dict):
            pa_strings.append(pa['propertyType'])
        else:
            pa_strings.append(pa['propertyType']['label'])
    return pa_strings

def extract_person(person):
Swen Vermeul's avatar
Swen Vermeul committed
    if not isinstance(person, dict):
        return str(person)
Swen Vermeul's avatar
Swen Vermeul committed
    return person['userId']
def crc32(fileName):
    """since Python3 the zlib module returns unsigned integers (2.7: signed int)
    """
    prev = 0
    for eachLine in open(fileName, "rb"):
        prev = zlib.crc32(eachLine, prev)
    # return as hex

def _create_tagIds(tags=None):
    if tags is None:
        return None
    if not isinstance(tags, list):
        tags = [tags]
    tagIds = []
    for tag in tags:
        tagIds.append({"code": tag, "@type": "as.dto.tag.id.TagCode"})
def _tagIds_for_tags(tags=None, action='Add'):
    """creates an action item to add or remove tags. Action is either 'Add', 'Remove' or 'Set'
    """
    if tags is None:
        return
    if not isinstance(tags, list):
        tags = [tags]

    items = []
    for tag in tags:
        items.append({
            "code": tag,
            "@type": "as.dto.tag.id.TagCode"
        })

    tagIds = {
        "actions": [
            {
                "items": items,
                "@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
            }
        ],
        "@type": "as.dto.common.update.IdListUpdateValue"
    }
    return tagIds

def _list_update(ids=None, entity=None, action='Add'):
    """creates an action item to add, set or remove ids. 
    """
    if ids is None:
        return
    if not isinstance(ids, list):
        ids = [ids]

    items = []
    for ids in ids:
        items.append({
            "code": ids,
            "@type": "as.dto.{}.id.{}Code".format(entity.lower(), entity)
        })

    list_update = {
        "actions": [
            {
                "items": items,
                "@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
            }
        ],
        "@type": "as.dto.common.update.IdListUpdateValue"
    }

def _create_typeId(type):
    return {
        "permId": type.upper(),
        "@type": "as.dto.entitytype.id.EntityTypePermId"
    }


def _create_projectId(ident):
    match = re.match('/', ident)
    if match:
        return {
            "identifier": ident,
            "@type": "as.dto.project.id.ProjectIdentifier"
        }
    else:
            "permId": ident,
            "@type": "as.dto.project.id.ProjectPermId"
        }

def _create_experimentId(ident):
    return {
        "identifier": ident,
        "@type": "as.dto.experiment.id.ExperimentIdentifier"
    }


def _common_search(search_type, value, comparison="StringEqualToValue"):
    sreq = {
            "@type": "as.dto.common.search.{}".format(comparison)
        }
    }
def _criteria_for_code(code):
    return {
        "fieldValue": {
            "@type": "as.dto.common.search.StringEqualToValue"
        },
        "@type": "as.dto.common.search.CodeSearchCriteria"
    }

def _subcriteria_for_type(code, entity):
        "@type": "as.dto.{}.search.{}TypeSearchCriteria".format(entity.lower(), entity),
                "@type": "as.dto.common.search.CodeSearchCriteria",
                "fieldValue": {
                    "value": code.upper(),
                    "@type": "as.dto.common.search.StringEqualToValue"
                }
def _subcriteria_for_status(status_value):
    status_value = status_value.upper()
    valid_status = "AVAILABLE LOCKED ARCHIVED UNARCHIVE_PENDING ARCHIVE_PENDING BACKUP_PENDING".split()
    if not status_value in valid_status:
        raise ValueError("status must be one of the following: " + ", ".join(valid_status))

    return {
        "@type": "as.dto.dataset.search.PhysicalDataSearchCriteria",
        "operator": "AND",
        "criteria": [{
            "@type":
                "as.dto.dataset.search.StatusSearchCriteria",
            "fieldName": "status",
            "fieldType": "ATTRIBUTE",
def _gen_search_criteria(req):
    sreq = {}
    for key, val in req.items():
        if key == "criteria":
            items = []
            for item in req['criteria']:
                items.append(_gen_search_criteria(item))
            sreq['criteria'] = items
        elif key == "code":
            sreq["criteria"] = [_common_search(
                "as.dto.common.search.CodeSearchCriteria", val.upper()
            )]
        elif key == "identifier":
Swen Vermeul's avatar
Loading
Loading full blame...