pybis.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
pybis.py

Work with openBIS from Python.

"""

import os
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

import time
from datetime import datetime
import json
import re
from urllib.parse import urlparse
import zlib
from collections import namedtuple


import pandas as pd
from pandas import DataFrame, Series

import threading
from threading import Thread
from queue import Queue
DROPBOX_PLUGIN = "jupyter-uploader-api"

search_for_type = {
    "space":      "as.dto.space.search.SpaceSearchCriteria",
    "project":    "as.dto.project.search.ProjectSearchCriteria",
    "experiment": "as.dto.experiment.search.ExperimentSearchCriteria",
    "code":       "as.dto.common.search.CodeSearchCriteria",
    "sample_type":"as.dto.sample.search.SampleTypeSearchCriteria",
}

fetch_option = {
    "space":        { "@type": "as.dto.space.fetchoptions.SpaceFetchOptions" },
    "project":      { "@type": "as.dto.project.fetchoptions.ProjectFetchOptions" },
    "experiment":   { "@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions" },
    "sample":       { "@type": "as.dto.sample.fetchoptions.SampleFetchOptions" },
    "dataset":      { "@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions" },
    "physicalData": { "@type": "as.dto.dataset.fetchoptions.PhysicalDataFetchOptions" },
    "linkedData":   { "@type": "as.dto.dataset.fetchoptions.LinkedDataFetchOptions" },


    "properties":   { "@type": "as.dto.property.fetchoptions.PropertyFetchOptions" },
    "tags":         { "@type": "as.dto.tag.fetchoptions.TagFetchOptions" },

    "registrator":  { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
    "modifier":     { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },
    "leader":       { "@type": "as.dto.person.fetchoptions.PersonFetchOptions" },

    "attachments":  { "@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions" },
    "history":      { "@type": "as.dto.history.fetchoptions.HistoryEntryFetchOptions" },
    "dataStore":    { "@type": "as.dto.datastore.fetchoptions.DataStoreFetchOptions" },
}


def parse_jackson(input_json):
    """openBIS uses a library called «jackson» to automatically generate the JSON RPC output.
       Objects that are found the first time are added an attribute «@id».
       Any further findings only carry this reference id.
       This function is used to dereference the output.
    """
    interesting=['tags', 'registrator', 'modifier', 'type', 'parents', 
        'children', 'containers', 'properties', 'experiment', 'sample',
        'project', 'space', 'propertyType'
    ]
    found = {} 
    def build_cache(graph):
        if isinstance(graph, list):
            for item in graph:
                build_cache(item)
        elif isinstance(graph, dict) and len(graph) > 0:
            for key, value in graph.items():
                if key in interesting:
                    if isinstance(value, dict):
                        if '@id' in value:
                            found[value['@id']] = value
                        build_cache(value)
                    elif isinstance(value, list):
                        for item in value:
                            if isinstance(item, dict):
                                if '@id' in item:
                                    found[item['@id']] = item
                                build_cache(item)
                elif isinstance(value, dict):
                    build_cache(value)
                elif isinstance(value, list):
                    build_cache(value)
                    
    def deref_graph(graph):            
        if isinstance(graph, list):
            for item in graph:
                deref_graph(item)
        elif isinstance(graph, dict) and len(graph) > 0:
            for key, value in graph.items():
                if key in interesting:
                    if isinstance(value, dict):
                        deref_graph(value)
                    elif isinstance(value, int):
                        graph[key] = found[value]
                    elif isinstance(value, list):
                        for i, list_item in enumerate(value):
                            if isinstance(list_item, int):
                                value[i] = found[list_item]
                elif isinstance(value, dict):
                    deref_graph(value)
                elif isinstance(value, list):
                    deref_graph(value)

    build_cache(input_json)
    deref_graph(input_json)

def check_datatype(type_name, value):
    if type_name == 'INTEGER':
        return isinstance(value, int)
    if type_name == 'BOOLEAN':
        return isinstance(value, bool)
    if type_name == 'VARCHAR':
        return isinstance(value, str)
    return True

def search_request_for_identifier(identifier, entity_type):
    
        search_request = {}
        # assume we got a sample identifier e.g. /TEST/TEST-SAMPLE
        match = re.match('/', identifier)
        if match:
            search_request = {
                "identifier": identifier.upper(),
                "@type": "as.dto.{}.id.{}Identifier".format(entity_type.lower(), entity_type.capitalize())
            }
        else:
            search_request = {
                "permId": identifier,
                "@type": "as.dto.{}.id.{}PermId".format(entity_type.lower(), entity_type.capitalize())
            }
        return search_request

def table_for_attributes(attributes):
    table = '<table border="1" class="dataframe"><thead><tr style="text-align: right;"> <th>attribute</th> <th>value</th> </tr> </thead><tbody>'

    for key, val in attributes.items():
        table += '<tr><th>{}</th><td>{}</td></tr>'.format(key, val)

    table += '</tbody></table>'
    return table

def format_timestamp(ts):
    return datetime.fromtimestamp(round(ts/1000)).strftime('%Y-%m-%d %H:%M:%S')

def extract_code(obj):
    return obj['code']

def extract_deletion(obj):
    del_objs = []
    for deleted_object in obj['deletedObjects']:
        del_objs.append({
            "reason": obj['reason'],
            "permId": deleted_object["id"]["permId"],
            "type": deleted_object["id"]["@type"]
        })
    return del_objs

def extract_identifier(ident):
    if not isinstance(ident, dict): 
        return str(ident)
    return ident['identifier']

def extract_nested_identifier(ident):
    if not isinstance(ident, dict): 
        return str(ident)
    return ident['identifier']['identifier']

def extract_permid(permid):
    if not isinstance(permid, dict):
        return str(permid)
    return permid['permId']

def extract_nested_permid(permid):
    if not isinstance(permid, dict):
        return str(permid)
    return permid['permId']['permId']

def extract_property_assignments(pas):
    pa_strings = []
    for pa in pas:
        if not isinstance(pa['propertyType'], dict):
            pa_strings.append(pa['propertyType'])
        else:
            pa_strings.append(pa['propertyType']['label'])
    return pa_strings