Newer
Older
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
pybis.py
"""
Swen Vermeul
committed
from __future__ import print_function
import os
Chandrasekhar Ramakrishnan
committed
import random
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
Swen Vermeul
committed
import copy
Swen Vermeul
committed
import time
Swen Vermeul
committed
import base64
from collections import namedtuple
from texttable import Texttable
from tabulate import tabulate
from pybis.utils import parse_jackson, check_datatype, split_identifier, format_timestamp, is_identifier, is_permid, nvl
from pybis.property import PropertyHolder, PropertyAssignments
from pybis.masterdata import Vocabulary
Chandrasekhar Ramakrishnan
committed
from . import data_set as pbds
import pandas as pd
from pandas import DataFrame, Series
Swen Vermeul
committed
import threading
from threading import Thread
from queue import Queue
Chandrasekhar Ramakrishnan
committed
from datetime import datetime
PYBIS_PLUGIN = "jupyter-uploader-api"
entities = {
"Space": {
"attrs_new": "code description".split(),
"attrs_up": "description".split(),
"attrs": "code permId description registrator registrationDate modificationDate".split(),
"identifier": "spaceId",
},
"Project": {
"attrs_new": "code description space attachments".split(),
"attrs_up": "description space attachments".split(),
"attrs": "code description permId identifier space leader registrator registrationDate modifier modificationDate attachments".split(),
"multi": "".split(),
"identifier": "projectId",
},
"Experiment": {
"attrs_new": "code type project tags attachments".split(),
"attrs_up": "project tags attachments".split(),
"attrs": "code permId identifier type project tags attachments".split(),
"multi": "tags attachments".split(),
"identifier": "experimentId",
},
"Sample": {
"attrs_new": "code type parents children space experiment tags attachments".split(),
"attrs_up": "parents children space experiment tags attachments".split(),
"attrs": "code permId identifier type parents children components space experiment tags attachments".split(),
"ids2type": {
'parentIds': {'permId': {'@type': 'as.dto.sample.id.SamplePermId'}},
'childIds': {'permId': {'@type': 'as.dto.sample.id.SamplePermId'}},
'componentIds': {'permId': {'@type': 'as.dto.sample.id.SamplePermId'}},
},
"identifier": "sampleId",
"cre_type": "as.dto.sample.create.SampleCreation",
"multi": "parents children components tags attachments".split(),
},
"DataSet": {
"attrs_new": "type experiment sample parents children container components tags".split(),
"attrs_up": "parents children experiment sample container components tags".split(),
Swen Vermeul
committed
"attrs": "code permId type experiment sample parents children container components tags accessDate dataProducer dataProductionDate registrator registrationDate modifier modificationDate dataStore measured".split(),
"ids2type": {
'parentIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
'childIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
'componentIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
'containerIds': {'permId': {'@type': 'as.dto.dataset.id.DataSetPermId'}},
},
"multi": "parents children container".split(),
"identifier": "dataSetId",
},
"Material": {
"attrs_new": "code description type creation tags".split(),
"attrs": "code description type creation registrator tags".split()
},
"Tag": {
"attrs_new": "code description experiments samples dataSets materials".split(),
"attrs": "code description experiments samples dataSets materials registrationDate".split(),
},
"attr2ids": {
"space": "spaceId",
"project": "projectId",
"sample": "sampleId",
"samples": "sampleIds",
"dataSet": "dataSetId",
"dataSets": "dataSetIds",
"experiment": "experimentId",
"material": "materialId",
"materials": "materialIds",
"container": "containerId",
"component": "componentId",
"components": "componentIds",
"parents": "parentIds",
"children": "childIds",
"tags": "tagIds",
},
"ids2type": {
'spaceId': {'permId': {'@type': 'as.dto.space.id.SpacePermId'}},
'projectId': {'permId': {'@type': 'as.dto.project.id.ProjectPermId'}},
'experimentId': {'permId': {'@type': 'as.dto.experiment.id.ExperimentPermId'}},
'tagIds': {'code': {'@type': 'as.dto.tag.id.TagCode'}},
},
}
"space": "as.dto.space.search.SpaceSearchCriteria",
"project": "as.dto.project.search.ProjectSearchCriteria",
"experiment": "as.dto.experiment.search.ExperimentSearchCriteria",
"sample": "as.dto.sample.search.SampleSearchCriteria",
"dataset": "as.dto.dataset.search.DataSetSearchCriteria",
"code": "as.dto.common.search.CodeSearchCriteria",
"sample_type": "as.dto.sample.search.SampleTypeSearchCriteria",
"space": {"@type": "as.dto.space.fetchoptions.SpaceFetchOptions"},
"project": {"@type": "as.dto.project.fetchoptions.ProjectFetchOptions"},
"experiment": {
"@type": "as.dto.experiment.fetchoptions.ExperimentFetchOptions",
"type": {"@type": "as.dto.experiment.fetchoptions.ExperimentTypeFetchOptions"}
"sample": {
"@type": "as.dto.sample.fetchoptions.SampleFetchOptions",
"type": {"@type": "as.dto.sample.fetchoptions.SampleTypeFetchOptions"}
"samples": {"@type": "as.dto.sample.fetchoptions.SampleFetchOptions"},
"dataSets": {
Swen Vermeul
committed
"@type": "as.dto.dataset.fetchoptions.DataSetFetchOptions",
"properties": {"@type": "as.dto.property.fetchoptions.PropertyFetchOptions"},
"type": {"@type": "as.dto.dataset.fetchoptions.DataSetTypeFetchOptions"},
Swen Vermeul
committed
},
"physicalData": {"@type": "as.dto.dataset.fetchoptions.PhysicalDataFetchOptions"},
"linkedData": {
"externalDms": {"@type": "as.dto.externaldms.fetchoptions.ExternalDmsFetchOptions"},
"@type": "as.dto.dataset.fetchoptions.LinkedDataFetchOptions"
},
"properties": {"@type": "as.dto.property.fetchoptions.PropertyFetchOptions"},
"propertyAssignments": {
"@type": "as.dto.property.fetchoptions.PropertyAssignmentFetchOptions",
Swen Vermeul
committed
"propertyType": {
"@type": "as.dto.property.fetchoptions.PropertyTypeFetchOptions"
}
},
"tags": {"@type": "as.dto.tag.fetchoptions.TagFetchOptions"},
"registrator": {"@type": "as.dto.person.fetchoptions.PersonFetchOptions"},
"modifier": {"@type": "as.dto.person.fetchoptions.PersonFetchOptions"},
"leader": {"@type": "as.dto.person.fetchoptions.PersonFetchOptions"},
"attachments": {"@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions"},
Swen Vermeul
committed
"attachmentsWithContent": {
"@type": "as.dto.attachment.fetchoptions.AttachmentFetchOptions",
"content": {
"@type": "as.dto.common.fetchoptions.EmptyFetchOptions"
},
},
"history": {"@type": "as.dto.history.fetchoptions.HistoryEntryFetchOptions"},
"dataStore": {"@type": "as.dto.datastore.fetchoptions.DataStoreFetchOptions"},
def search_request_for_identifier(ident, entity):
search_request = {}
Swen Vermeul
committed
if is_identifier(ident):
search_request = {
"identifier": ident.upper(),
"@type": "as.dto.{}.id.{}Identifier".format(entity.lower(), entity.capitalize())
}
else:
search_request = {
"permId": ident,
"@type": "as.dto.{}.id.{}PermId".format(entity.lower(), entity.capitalize())
}
return search_request
if not isinstance(obj, dict):
return str(obj)
def extract_deletion(obj):
del_objs = []
for deleted_object in obj['deletedObjects']:
del_objs.append({
"reason": obj['reason'],
"permId": deleted_object["id"]["permId"],
"type": deleted_object["id"]["@type"]
})
return del_objs
def extract_identifier(ident):
if not isinstance(ident, dict):
return str(ident)
return ident['identifier']
def extract_nested_identifier(ident):
if not isinstance(ident, dict):
return str(ident)
return ident['identifier']['identifier']
def extract_permid(permid):
if not isinstance(permid, dict):
return str(permid)
return permid['permId']
def extract_nested_permid(permid):
if not isinstance(permid, dict):
return str(permid)
return permid['permId']['permId']
def extract_property_assignments(pas):
pa_strings = []
for pa in pas:
if not isinstance(pa['propertyType'], dict):
pa_strings.append(pa['propertyType'])
else:
pa_strings.append(pa['propertyType']['label'])
return pa_strings
def extract_person(person):
"""since Python3 the zlib module returns unsigned integers (2.7: signed int)
"""
for eachLine in open(fileName, "rb"):
prev = zlib.crc32(eachLine, prev)
# return as hex
return "%x" % (prev & 0xFFFFFFFF)
def _create_tagIds(tags=None):
if tags is None:
return None
if not isinstance(tags, list):
tags = [tags]
tagIds.append({"code": tag, "@type": "as.dto.tag.id.TagCode"})
return tagIds
def _tagIds_for_tags(tags=None, action='Add'):
"""creates an action item to add or remove tags. Action is either 'Add', 'Remove' or 'Set'
"""
if tags is None:
return
if not isinstance(tags, list):
tags = [tags]
items = []
for tag in tags:
items.append({
"code": tag,
"@type": "as.dto.tag.id.TagCode"
})
tagIds = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
def _list_update(ids=None, entity=None, action='Add'):
"""creates an action item to add, set or remove ids.
"""
if ids is None:
return
if not isinstance(ids, list):
ids = [ids]
items = []
for ids in ids:
items.append({
"code": ids,
"@type": "as.dto.{}.id.{}Code".format(entity.lower(), entity)
})
list_update = {
"actions": [
{
"items": items,
"@type": "as.dto.common.update.ListUpdateAction{}".format(action.capitalize())
}
],
"@type": "as.dto.common.update.IdListUpdateValue"
}
return list_update
def _create_typeId(type):
return {
"permId": type.upper(),
"@type": "as.dto.entitytype.id.EntityTypePermId"
}
def _create_projectId(ident):
match = re.match('/', ident)
if match:
return {
"identifier": ident,
"@type": "as.dto.project.id.ProjectIdentifier"
}
else:
return {
"permId": ident,
"@type": "as.dto.project.id.ProjectPermId"
}
def _create_experimentId(ident):
return {
"identifier": ident,
"@type": "as.dto.experiment.id.ExperimentIdentifier"
}
def _common_search(search_type, value, comparison="StringEqualToValue"):
sreq = {
"@type": search_type,
"fieldValue": {
"value": value,
"@type": "as.dto.common.search.{}".format(comparison)
}
}
return sreq
def _criteria_for_code(code):
return {
"fieldValue": {
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
},
"@type": "as.dto.common.search.CodeSearchCriteria"
}
def _subcriteria_for_type(code, entity):
return {
"@type": "as.dto.{}.search.{}TypeSearchCriteria".format(entity.lower(), entity),
"criteria": [
{
"@type": "as.dto.common.search.CodeSearchCriteria",
"fieldValue": {
"value": code.upper(),
"@type": "as.dto.common.search.StringEqualToValue"
}
}
}
def _subcriteria_for_status(status_value):
status_value = status_value.upper()
valid_status = "AVAILABLE LOCKED ARCHIVED UNARCHIVE_PENDING ARCHIVE_PENDING BACKUP_PENDING".split()
if not status_value in valid_status:
raise ValueError("status must be one of the following: " + ", ".join(valid_status))
return {
"@type": "as.dto.dataset.search.PhysicalDataSearchCriteria",
"operator": "AND",
"criteria": [{
"@type":
"as.dto.dataset.search.StatusSearchCriteria",
"fieldName": "status",
"fieldValue": status_value
sreq = {}
for key, val in req.items():
if key == "criteria":
items = []
for item in req['criteria']:
items.append(_gen_search_criteria(item))
sreq['criteria'] = items
elif key == "code":
sreq["criteria"] = [_common_search(
"as.dto.common.search.CodeSearchCriteria", val.upper()
)]
elif key == "identifier":
Loading
Loading full blame...