From ed30a77b3c688ce038bcac9f243ab9342ffcaed9 Mon Sep 17 00:00:00 2001 From: Henry Luetcke <hluetcke@ethz.ch> Date: Fri, 2 Nov 2018 13:34:54 +0100 Subject: [PATCH] implement dataset methods --- object/OpenBis.m | 96 +++++++++- pybis_example.ipynb | 444 +++++++------------------------------------- 2 files changed, 162 insertions(+), 378 deletions(-) diff --git a/object/OpenBis.m b/object/OpenBis.m index 36a22e15521..6d76aeb332f 100644 --- a/object/OpenBis.m +++ b/object/OpenBis.m @@ -4,10 +4,10 @@ classdef OpenBis % and provides methods for interacting with the Python (pyBIS) Openbis object. % % Usage: - % Construct the MATLAB OpenBis object like this: + % Construct the MATLAB OpenBis object like this: % obi = OpenBis() - % This will ask for URL, user name and password to connect to openBIS server. - % These can also be provided as optional input arguments. + % This will ask for URL, user name and password to connect to openBIS server. + % These can also be provided as optional input arguments. % % Methods are generally called like this: % spaces = obi.get_spaces() @@ -186,6 +186,96 @@ classdef OpenBis project.save(); end + %% Dataset methods + % this section defines following Matlab methods: + % get_datasets + % get_dataset + % get_dataset_files + % dataset_download + + function datasets = get_datasets(obj, varargin) + % Return table of matching datasets. + % Optional input arguments: + % code, type, experiment, project, tags + + defaultCode = ''; + defaultType = ''; + defaultExp = ''; + defaultProj = ''; + defaultTags = ''; + + p = inputParser; + addRequired(p, 'obj'); + addParameter(p, 'code', defaultCode, @ischar); + addParameter(p, 'type', defaultType, @ischar); + addParameter(p, 'experiment', defaultExp, @ischar); + addParameter(p, 'project', defaultProj, @ischar); + addParameter(p, 'tags', defaultTags, @ischar); + parse(p, obj, varargin{:}); + a = p.Results; + + datasets = obj.pybis.get_datasets(pyargs('code', a.code, 'type', a.type, 'experiment', a.experiment, ... + 'project', a.project, 'tags', a.tags)); + datasets = df_to_table(datasets.df); + end + + function dataset = get_dataset(obj, permid, varargin) + + only_data = false; + + p = inputParser; + addRequired(p, 'obj'); + addRequired(p, 'permid', @ischar); + addOptional(p, 'only_data', only_data, @islogical); + parse(p, obj, permid, varargin{:}); + a = p.Results; + + dataset = obj.pybis.get_dataset(pyargs('permid', a.permid, 'only_data', a.only_data)); + + end + + + function files = get_dataset_files(obj, dataset, varargin) + + start_folder = '/'; + + p = inputParser; + addRequired(p, 'obj'); + addRequired(p, 'dataset'); + addOptional(p, 'start_folder', start_folder, @ischar); + parse(p, obj, dataset, varargin{:}); + a = p.Results; + + files = dataset.get_files(pyargs('start_folder', a.start_folder)); + + files = df_to_table(files); + + end + + + function path_to_file = dataset_download(obj, dataset, files, varargin) + % provide files as cell array of files + + destination = 'data'; + wait_until_finished = true; + workers = 10; + + p = inputParser; + addRequired(p, 'obj'); + addRequired(p, 'dataset'); + addRequired(p, 'files', @iscellstr); + addParameter(p, 'destination', destination, @ischar); + addParameter(p, 'wait_until_finished', wait_until_finished, @islogical); + addParameter(p, 'workers', workers, @isscalar); + + parse(p, obj, dataset, files, varargin{:}); + a = p.Results; + + dataset.download(pyargs('files', a.files, 'destination', a.destination, 'wait_until_finished', a.wait_until_finished, 'workers', int16(a.workers))); + + path_to_file = fullfile(a.destination, dataset.char, a.files); + + end end diff --git a/pybis_example.ipynb b/pybis_example.ipynb index cc19cc56ca8..704d60d52dc 100644 --- a/pybis_example.ipynb +++ b/pybis_example.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -30,17 +30,9 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - " 路路路路路路路路路路路路路路路路路路路路路\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "username = 'hluetcke'\n", "pw = getpass.getpass()" @@ -48,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -58,96 +50,27 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'hluetcke-181026151925182x3063F8B1C01218403A7F2B2FFC989331'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "o.token" ] }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "o.is_session_active()" ] }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>code</th>\n", - " <th>downloadUrl</th>\n", - " <th>hostUrl</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>DSS1</td>\n", - " <td>https://limb.ethz.ch:443/datastore_server</td>\n", - " <td>https://limb.ethz.ch:443</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " code downloadUrl hostUrl\n", - "0 DSS1 https://limb.ethz.ch:443/datastore_server https://limb.ethz.ch:443" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "o.get_datastores()" ] @@ -161,120 +84,9 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>code</th>\n", - " <th>description</th>\n", - " <th>modificationDate</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>IHC</td>\n", - " <td>Immunohistochemistry</td>\n", - " <td>2010-10-01 15:27:15</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>LACZ</td>\n", - " <td>Experiments with lacZ staining.</td>\n", - " <td>2010-10-06 16:24:22</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>CULTURE</td>\n", - " <td>Culture</td>\n", - " <td>2010-08-19 08:49:11</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>ISH</td>\n", - " <td>in situ hybridization</td>\n", - " <td>2010-08-19 08:49:25</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>SKELETAL</td>\n", - " <td>Skeletal</td>\n", - " <td>2010-08-19 08:49:34</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>METHODS</td>\n", - " <td>Folder to store protocols</td>\n", - " <td>2016-05-24 16:36:12</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>PAPER</td>\n", - " <td>Experiments from a paper.</td>\n", - " <td>2010-11-12 12:08:37</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>LIGHTSHEET</td>\n", - " <td>Lightsheet imaging</td>\n", - " <td>2018-05-31 13:29:04</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>SAMPLE</td>\n", - " <td>For Sample registration.</td>\n", - " <td>2010-11-22 10:28:53</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9</th>\n", - " <td>MATERIALS</td>\n", - " <td>Folder to store biological and chemical samples</td>\n", - " <td>2016-05-24 16:35:51</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " code description modificationDate\n", - "-- ---------- ----------------------------------------------- -------------------\n", - " 0 IHC Immunohistochemistry 2010-10-01 15:27:15\n", - " 1 LACZ Experiments with lacZ staining. 2010-10-06 16:24:22\n", - " 2 CULTURE Culture 2010-08-19 08:49:11\n", - " 3 ISH in situ hybridization 2010-08-19 08:49:25\n", - " 4 SKELETAL Skeletal 2010-08-19 08:49:34\n", - " 5 METHODS Folder to store protocols 2016-05-24 16:36:12\n", - " 6 PAPER Experiments from a paper. 2010-11-12 12:08:37\n", - " 7 LIGHTSHEET Lightsheet imaging 2018-05-31 13:29:04\n", - " 8 SAMPLE For Sample registration. 2010-11-22 10:28:53\n", - " 9 MATERIALS Folder to store biological and chemical samples 2016-05-24 16:35:51" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "o.get_experiment_types()" ] @@ -328,7 +140,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### List and get datasets" + "### Samples / objects" ] }, { @@ -337,7 +149,7 @@ "metadata": {}, "outputs": [], "source": [ - "datasets = o.get_datasets(type='HISTOLOGY')" + "o.get_sample_types()" ] }, { @@ -346,12 +158,7 @@ "metadata": {}, "outputs": [], "source": [ - "counter = 0\n", - "for ds in datasets:\n", - " print(ds)\n", - " counter += 1\n", - " if counter > 10:\n", - " break" + "obj = o.new_object(type='UNKNOWN', space='MATLAB_TEST', code='12345')" ] }, { @@ -360,7 +167,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds = o.get_dataset('20101105142920015-6525')" + "obj.save()" ] }, { @@ -368,59 +175,54 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ - "ds.get_files(start_folder='original')" + "### List and get datasets" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "space = o.get_space('MATLAB_TEST')" + "datasets = o.get_datasets(type='HISTOLOGY')" ] }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " <table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th>attribute</th>\n", - " <th>value</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr> <td>code</td> <td>MATLAB_TEST</td> </tr><tr> <td>permId</td> <td>MATLAB_TEST</td> </tr><tr> <td>description</td> <td>test space for Matlab access to openBIS</td> </tr><tr> <td>registrator</td> <td>hluetcke</td> </tr><tr> <td>registrationDate</td> <td>2018-08-17 10:39:05</td> </tr><tr> <td>modificationDate</td> <td>2018-08-17 10:39:05</td> </tr>\n", - " </tbody>\n", - " </table>\n", - " " - ], - "text/plain": [ - "attribute value\n", - "---------------- ---------------------------------------\n", - "code MATLAB_TEST\n", - "permId MATLAB_TEST\n", - "description test space for Matlab access to openBIS\n", - "registrator hluetcke\n", - "registrationDate 2018-08-17 10:39:05\n", - "modificationDate 2018-08-17 10:39:05" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "space.delete" + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "counter = 0\n", + "for ds in datasets:\n", + " print(ds)\n", + " counter += 1\n", + " if counter > 10:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = o.get_dataset('20101105142049776-6512')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.get_files(start_folder='original')" ] }, { @@ -436,7 +238,7 @@ "metadata": {}, "outputs": [], "source": [ - "ds.download(files='original/441_x40001.tif', destination='data', wait_until_finished=True)" + "ds.download(files='', destination='data', wait_until_finished=True)" ] }, { @@ -477,72 +279,16 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>identifier</th>\n", - " <th>permId</th>\n", - " <th>leader</th>\n", - " <th>registrator</th>\n", - " <th>registrationDate</th>\n", - " <th>modifier</th>\n", - " <th>modificationDate</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>/MATLAB_TEST/TEST_PROJECT</td>\n", - " <td>20180817104532621-9268</td>\n", - " <td>None</td>\n", - " <td>hluetcke</td>\n", - " <td>2018-08-17 10:45:33</td>\n", - " <td>hluetcke</td>\n", - " <td>2018-08-17 10:45:33</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - " identifier permId leader registrator registrationDate modifier modificationDate\n", - "-- ------------------------- ---------------------- -------- ------------- ------------------- ---------- -------------------\n", - " 0 /MATLAB_TEST/TEST_PROJECT 20180817104532621-9268 None hluetcke 2018-08-17 10:45:33 hluetcke 2018-08-17 10:45:33" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "o.get_projects(space='MATLAB_TEST', code=None)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -551,70 +297,18 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project successfully created.\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - " <table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th>attribute</th>\n", - " <th>value</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr> <td>code</td> <td>ANOTHER_TEST</td> </tr><tr> <td>description</td> <td>TGIF</td> </tr><tr> <td>permId</td> <td>20181026152148956-9272</td> </tr><tr> <td>identifier</td> <td>/MATLAB_TEST/ANOTHER_TEST</td> </tr><tr> <td>space</td> <td>MATLAB_TEST</td> </tr><tr> <td>leader</td> <td></td> </tr><tr> <td>registrator</td> <td>hluetcke</td> </tr><tr> <td>registrationDate</td> <td>2018-10-26 15:21:49</td> </tr><tr> <td>modifier</td> <td></td> </tr><tr> <td>modificationDate</td> <td>2018-10-26 15:21:49</td> </tr><tr><td>attachments</td><td></td></tr>\n", - " </tbody>\n", - " </table>\n", - " " - ], - "text/plain": [ - "attribute value\n", - "---------------- -------------------------\n", - "code ANOTHER_TEST\n", - "description TGIF\n", - "permId 20181026152148956-9272\n", - "identifier /MATLAB_TEST/ANOTHER_TEST\n", - "space MATLAB_TEST\n", - "leader\n", - "registrator hluetcke\n", - "registrationDate 2018-10-26 15:21:49\n", - "modifier\n", - "modificationDate 2018-10-26 15:21:49" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "project.save()" ] }, { "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 20181026152148956-9272 successfully deleted.\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "project.delete('just a test')" ] -- GitLab