Newer
Older
from tabulate import tabulate
from pandas import DataFrame, Series
import pandas as pd
class Things():
"""An object that contains a DataFrame object about an entity available in openBIS.
Use .df to work with the DataFrame instead.
Can be used in a for-loop:
for sample in openbis.get_samples():
...
You can access an element directly by providing the index number:
openbis.get_samples()[7]
Because the order of the elements cannot be ensured, you should choose the identifier instead:
openbis.get_samples()['/SOME_SPACE/SAMPLE_CODE']
vkovtun
committed
Of course, if you know the identifier already, you would rather do:
vkovtun
committed
self, openbis_obj, entity, df,
vkovtun
committed
identifier_name='code', additional_identifier=None,
start_with=None, count=None, totalCount=None,
vkovtun
committed
objects=None,
response=None
self.openbis = openbis_obj
self.entity = entity
self.df = df
self.identifier_name = identifier_name
self.additional_identifier = additional_identifier
self.totalCount=totalCount
self.single_item_method=single_item_method
vkovtun
committed
self.response = response
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def __repr__(self):
return tabulate(self.df, headers=list(self.df))
def __len__(self):
return len(self.df)
def _repr_html_(self):
return self.df._repr_html_()
def get_parents(self, **kwargs):
if self.entity not in ['sample', 'dataset']:
raise ValueError("{}s do not have parents".format(self.entity))
if self.df is not None and len(self.df) > 0:
dfs = []
for ident in self.df[self.identifier_name]:
# get all objects that have this object as a child == parent
try:
parents = getattr(self.openbis, 'get_' + self.entity.lower() + 's')(withChildren=ident, **kwargs)
dfs.append(parents.df)
except ValueError:
pass
if len(dfs) > 0:
return Things(self.openbis, self.entity, pd.concat(dfs), self.identifier_name)
else:
return Things(self.openbis, self.entity, DataFrame(), self.identifier_name)
def get_children(self, **kwargs):
if self.entity not in ['sample', 'dataset']:
raise ValueError("{}s do not have children".format(self.entity))
if self.df is not None and len(self.df) > 0:
dfs = []
for ident in self.df[self.identifier_name]:
# get all objects that have this object as a child == parent
try:
parents = getattr(self.openbis, 'get_' + self.entity.lower() + 's')(withParent=ident, **kwargs)
dfs.append(parents.df)
except ValueError:
pass
if len(dfs) > 0:
return Things(self.openbis, self.entity, pd.concat(dfs), self.identifier_name)
else:
return Things(self.openbis, self.entity, DataFrame(), self.identifier_name)
def get_samples(self, **kwargs):
if self.entity not in ['space', 'project', 'experiment']:
raise ValueError("{}s do not have samples".format(self.entity))
if self.df is not None and len(self.df) > 0:
dfs = []
for ident in self.df[self.identifier_name]:
args = {}
args[self.entity.lower()] = ident
try:
samples = self.openbis.get_samples(**args, **kwargs)
dfs.append(samples.df)
except ValueError:
pass
if len(dfs) > 0:
return Things(self.openbis, 'sample', pd.concat(dfs), 'identifier')
else:
return Things(self.openbis, 'sample', DataFrame(), 'identifier')
get_objects = get_samples # Alias
def get_datasets(self, **kwargs):
if self.entity not in ['sample', 'experiment']:
raise ValueError("{}s do not have datasets".format(self.entity))
if self.df is not None and len(self.df) > 0:
dfs = []
for ident in self.df[self.identifier_name]:
args = {}
args[self.entity.lower()] = ident
try:
datasets = self.openbis.get_datasets(**args, **kwargs)
dfs.append(datasets.df)
except ValueError:
pass
if len(dfs) > 0:
return Things(self.openbis, 'dataset', pd.concat(dfs), 'permId')
else:
return Things(self.openbis, 'dataset', DataFrame(), 'permId')
def __getitem__(self, key):
""" elegant way to fetch a certain element from the displayed list.
If an integer value is given, we choose the row.
If the key is a list, we return the desired columns (normal dataframe behaviour)
If the key is a non-integer value, we treat it as a primary-key lookup
"""
if self.df is not None and len(self.df) > 0:
row = None
if isinstance(key, int):
if self.objects:
return self.objects[key]
else:
# get thing by rowid
row = self.df.loc[[key]]
elif isinstance(key, list):
# treat it as a normal dataframe
return self.df[key]
else:
# get thing by code
row = self.df[self.df[self.identifier_name] == key.upper()]
if row is not None:
# invoke the openbis.get_<entity>() method
if self.single_item_method:
get_item = self.single_item_method
else:
get_item = getattr(self.openbis, 'get_' + self.entity)
if self.additional_identifier is None:
return get_item(row[self.identifier_name].values[0])
## get an entry using two keys
else:
return get_item(
row[self.identifier_name].values[0],
row[self.additional_identifier].values[0]
)
def __iter__(self):
if self.objects:
for obj in self.objects:
yield obj
if self.single_item_method:
get_item = self.single_item_method
else:
get_item = getattr(self.openbis, 'get_' + self.entity)
for item in self.df[[self.identifier_name]][self.identifier_name].iteritems():
yield get_item(item[1])
#yield getattr(self.openbis, 'get_' + self.entity)(item[1])
# return self.df[[self.identifier_name]].to_dict()[self.identifier_name]