From ed30a77b3c688ce038bcac9f243ab9342ffcaed9 Mon Sep 17 00:00:00 2001
From: Henry Luetcke <hluetcke@ethz.ch>
Date: Fri, 2 Nov 2018 13:34:54 +0100
Subject: [PATCH] implement dataset methods

---
 object/OpenBis.m    |  96 +++++++++-
 pybis_example.ipynb | 444 +++++++-------------------------------------
 2 files changed, 162 insertions(+), 378 deletions(-)

diff --git a/object/OpenBis.m b/object/OpenBis.m
index 36a22e15521..6d76aeb332f 100644
--- a/object/OpenBis.m
+++ b/object/OpenBis.m
@@ -4,10 +4,10 @@ classdef OpenBis
     % and provides methods for interacting with the Python (pyBIS) Openbis object.
     %
     % Usage:
-    % Construct the MATLAB OpenBis object like this: 
+    % Construct the MATLAB OpenBis object like this:
     % obi = OpenBis()
-    % This will ask for URL, user name and password to connect to openBIS server. 
-    % These can also be provided as optional input arguments. 
+    % This will ask for URL, user name and password to connect to openBIS server.
+    % These can also be provided as optional input arguments.
     %
     % Methods are generally called like this:
     % spaces = obi.get_spaces()
@@ -186,6 +186,96 @@ classdef OpenBis
             project.save();
         end
         
+        %% Dataset methods
+        % this section defines following Matlab methods:
+        % get_datasets
+        % get_dataset
+        % get_dataset_files
+        % dataset_download
+        
+        function datasets = get_datasets(obj, varargin)
+            % Return table of matching datasets.
+            % Optional input arguments:
+            % code, type, experiment, project, tags
+            
+            defaultCode = '';
+            defaultType = '';
+            defaultExp = '';
+            defaultProj = '';
+            defaultTags = '';
+            
+            p = inputParser;
+            addRequired(p, 'obj');
+            addParameter(p, 'code', defaultCode, @ischar);
+            addParameter(p, 'type', defaultType, @ischar);
+            addParameter(p, 'experiment', defaultExp, @ischar);
+            addParameter(p, 'project', defaultProj, @ischar);
+            addParameter(p, 'tags', defaultTags, @ischar);
+            parse(p, obj, varargin{:});
+            a = p.Results;
+            
+            datasets = obj.pybis.get_datasets(pyargs('code', a.code, 'type', a.type, 'experiment', a.experiment, ...
+                'project', a.project, 'tags', a.tags));
+            datasets = df_to_table(datasets.df);
+        end
+        
+        function dataset = get_dataset(obj, permid, varargin)
+            
+            only_data = false;
+            
+            p = inputParser;
+            addRequired(p, 'obj');
+            addRequired(p, 'permid', @ischar);
+            addOptional(p, 'only_data', only_data, @islogical);
+            parse(p, obj, permid, varargin{:});
+            a = p.Results;
+            
+            dataset = obj.pybis.get_dataset(pyargs('permid', a.permid, 'only_data', a.only_data));
+            
+        end
+        
+        
+        function files = get_dataset_files(obj, dataset, varargin)
+            
+            start_folder = '/';
+            
+            p = inputParser;
+            addRequired(p, 'obj');
+            addRequired(p, 'dataset');
+            addOptional(p, 'start_folder', start_folder, @ischar);
+            parse(p, obj, dataset, varargin{:});
+            a = p.Results;
+            
+            files = dataset.get_files(pyargs('start_folder', a.start_folder));
+            
+            files = df_to_table(files);
+            
+        end
+        
+        
+        function path_to_file = dataset_download(obj, dataset, files, varargin)
+            % provide files as cell array of files
+            
+            destination = 'data';
+            wait_until_finished = true;
+            workers = 10;
+            
+            p = inputParser;
+            addRequired(p, 'obj');
+            addRequired(p, 'dataset');
+            addRequired(p, 'files', @iscellstr);
+            addParameter(p, 'destination', destination, @ischar);
+            addParameter(p, 'wait_until_finished', wait_until_finished, @islogical);
+            addParameter(p, 'workers', workers, @isscalar);
+            
+            parse(p, obj, dataset, files, varargin{:});
+            a = p.Results;
+            
+            dataset.download(pyargs('files', a.files, 'destination', a.destination, 'wait_until_finished', a.wait_until_finished, 'workers', int16(a.workers)));
+            
+            path_to_file = fullfile(a.destination, dataset.char, a.files);
+            
+        end
         
     end
     
diff --git a/pybis_example.ipynb b/pybis_example.ipynb
index cc19cc56ca8..704d60d52dc 100644
--- a/pybis_example.ipynb
+++ b/pybis_example.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,17 +30,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdin",
-     "output_type": "stream",
-     "text": [
-      " 路路路路路路路路路路路路路路路路路路路路路\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "username = 'hluetcke'\n",
     "pw = getpass.getpass()"
@@ -48,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -58,96 +50,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'hluetcke-181026151925182x3063F8B1C01218403A7F2B2FFC989331'"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "o.token"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "o.is_session_active()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>code</th>\n",
-       "      <th>downloadUrl</th>\n",
-       "      <th>hostUrl</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>DSS1</td>\n",
-       "      <td>https://limb.ethz.ch:443/datastore_server</td>\n",
-       "      <td>https://limb.ethz.ch:443</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   code                                downloadUrl                   hostUrl\n",
-       "0  DSS1  https://limb.ethz.ch:443/datastore_server  https://limb.ethz.ch:443"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "o.get_datastores()"
    ]
@@ -161,120 +84,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>code</th>\n",
-       "      <th>description</th>\n",
-       "      <th>modificationDate</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>IHC</td>\n",
-       "      <td>Immunohistochemistry</td>\n",
-       "      <td>2010-10-01 15:27:15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>LACZ</td>\n",
-       "      <td>Experiments with lacZ staining.</td>\n",
-       "      <td>2010-10-06 16:24:22</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>CULTURE</td>\n",
-       "      <td>Culture</td>\n",
-       "      <td>2010-08-19 08:49:11</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>ISH</td>\n",
-       "      <td>in situ hybridization</td>\n",
-       "      <td>2010-08-19 08:49:25</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>SKELETAL</td>\n",
-       "      <td>Skeletal</td>\n",
-       "      <td>2010-08-19 08:49:34</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>METHODS</td>\n",
-       "      <td>Folder to store protocols</td>\n",
-       "      <td>2016-05-24 16:36:12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>PAPER</td>\n",
-       "      <td>Experiments from a paper.</td>\n",
-       "      <td>2010-11-12 12:08:37</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>LIGHTSHEET</td>\n",
-       "      <td>Lightsheet imaging</td>\n",
-       "      <td>2018-05-31 13:29:04</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>SAMPLE</td>\n",
-       "      <td>For Sample registration.</td>\n",
-       "      <td>2010-11-22 10:28:53</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>MATERIALS</td>\n",
-       "      <td>Folder to store biological and chemical samples</td>\n",
-       "      <td>2016-05-24 16:35:51</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    code        description                                      modificationDate\n",
-       "--  ----------  -----------------------------------------------  -------------------\n",
-       " 0  IHC         Immunohistochemistry                             2010-10-01 15:27:15\n",
-       " 1  LACZ        Experiments with lacZ staining.                  2010-10-06 16:24:22\n",
-       " 2  CULTURE     Culture                                          2010-08-19 08:49:11\n",
-       " 3  ISH         in situ hybridization                            2010-08-19 08:49:25\n",
-       " 4  SKELETAL    Skeletal                                         2010-08-19 08:49:34\n",
-       " 5  METHODS     Folder to store protocols                        2016-05-24 16:36:12\n",
-       " 6  PAPER       Experiments from a paper.                        2010-11-12 12:08:37\n",
-       " 7  LIGHTSHEET  Lightsheet imaging                               2018-05-31 13:29:04\n",
-       " 8  SAMPLE      For Sample registration.                         2010-11-22 10:28:53\n",
-       " 9  MATERIALS   Folder to store biological and chemical samples  2016-05-24 16:35:51"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "o.get_experiment_types()"
    ]
@@ -328,7 +140,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### List and get datasets"
+    "### Samples / objects"
    ]
   },
   {
@@ -337,7 +149,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "datasets = o.get_datasets(type='HISTOLOGY')"
+    "o.get_sample_types()"
    ]
   },
   {
@@ -346,12 +158,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "counter = 0\n",
-    "for ds in datasets:\n",
-    "    print(ds)\n",
-    "    counter += 1\n",
-    "    if counter > 10:\n",
-    "        break"
+    "obj = o.new_object(type='UNKNOWN', space='MATLAB_TEST', code='12345')"
    ]
   },
   {
@@ -360,7 +167,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds = o.get_dataset('20101105142920015-6525')"
+    "obj.save()"
    ]
   },
   {
@@ -368,59 +175,54 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
-    "ds.get_files(start_folder='original')"
+    "### List and get datasets"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "space = o.get_space('MATLAB_TEST')"
+    "datasets = o.get_datasets(type='HISTOLOGY')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "            <table border=\"1\" class=\"dataframe\">\n",
-       "            <thead>\n",
-       "                <tr style=\"text-align: right;\">\n",
-       "                <th>attribute</th>\n",
-       "                <th>value</th>\n",
-       "                </tr>\n",
-       "            </thead>\n",
-       "            <tbody>\n",
-       "        <tr> <td>code</td> <td>MATLAB_TEST</td> </tr><tr> <td>permId</td> <td>MATLAB_TEST</td> </tr><tr> <td>description</td> <td>test space for Matlab access to openBIS</td> </tr><tr> <td>registrator</td> <td>hluetcke</td> </tr><tr> <td>registrationDate</td> <td>2018-08-17 10:39:05</td> </tr><tr> <td>modificationDate</td> <td>2018-08-17 10:39:05</td> </tr>\n",
-       "            </tbody>\n",
-       "            </table>\n",
-       "        "
-      ],
-      "text/plain": [
-       "attribute         value\n",
-       "----------------  ---------------------------------------\n",
-       "code              MATLAB_TEST\n",
-       "permId            MATLAB_TEST\n",
-       "description       test space for Matlab access to openBIS\n",
-       "registrator       hluetcke\n",
-       "registrationDate  2018-08-17 10:39:05\n",
-       "modificationDate  2018-08-17 10:39:05"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "space.delete"
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "counter = 0\n",
+    "for ds in datasets:\n",
+    "    print(ds)\n",
+    "    counter += 1\n",
+    "    if counter > 10:\n",
+    "        break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds = o.get_dataset('20101105142049776-6512')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds.get_files(start_folder='original')"
    ]
   },
   {
@@ -436,7 +238,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds.download(files='original/441_x40001.tif', destination='data', wait_until_finished=True)"
+    "ds.download(files='', destination='data', wait_until_finished=True)"
    ]
   },
   {
@@ -477,72 +279,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>identifier</th>\n",
-       "      <th>permId</th>\n",
-       "      <th>leader</th>\n",
-       "      <th>registrator</th>\n",
-       "      <th>registrationDate</th>\n",
-       "      <th>modifier</th>\n",
-       "      <th>modificationDate</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>/MATLAB_TEST/TEST_PROJECT</td>\n",
-       "      <td>20180817104532621-9268</td>\n",
-       "      <td>None</td>\n",
-       "      <td>hluetcke</td>\n",
-       "      <td>2018-08-17 10:45:33</td>\n",
-       "      <td>hluetcke</td>\n",
-       "      <td>2018-08-17 10:45:33</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    identifier                 permId                  leader    registrator    registrationDate     modifier    modificationDate\n",
-       "--  -------------------------  ----------------------  --------  -------------  -------------------  ----------  -------------------\n",
-       " 0  /MATLAB_TEST/TEST_PROJECT  20180817104532621-9268  None      hluetcke       2018-08-17 10:45:33  hluetcke    2018-08-17 10:45:33"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "o.get_projects(space='MATLAB_TEST', code=None)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -551,70 +297,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Project successfully created.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "            <table border=\"1\" class=\"dataframe\">\n",
-       "            <thead>\n",
-       "                <tr style=\"text-align: right;\">\n",
-       "                <th>attribute</th>\n",
-       "                <th>value</th>\n",
-       "                </tr>\n",
-       "            </thead>\n",
-       "            <tbody>\n",
-       "        <tr> <td>code</td> <td>ANOTHER_TEST</td> </tr><tr> <td>description</td> <td>TGIF</td> </tr><tr> <td>permId</td> <td>20181026152148956-9272</td> </tr><tr> <td>identifier</td> <td>/MATLAB_TEST/ANOTHER_TEST</td> </tr><tr> <td>space</td> <td>MATLAB_TEST</td> </tr><tr> <td>leader</td> <td></td> </tr><tr> <td>registrator</td> <td>hluetcke</td> </tr><tr> <td>registrationDate</td> <td>2018-10-26 15:21:49</td> </tr><tr> <td>modifier</td> <td></td> </tr><tr> <td>modificationDate</td> <td>2018-10-26 15:21:49</td> </tr><tr><td>attachments</td><td></td></tr>\n",
-       "            </tbody>\n",
-       "            </table>\n",
-       "        "
-      ],
-      "text/plain": [
-       "attribute         value\n",
-       "----------------  -------------------------\n",
-       "code              ANOTHER_TEST\n",
-       "description       TGIF\n",
-       "permId            20181026152148956-9272\n",
-       "identifier        /MATLAB_TEST/ANOTHER_TEST\n",
-       "space             MATLAB_TEST\n",
-       "leader\n",
-       "registrator       hluetcke\n",
-       "registrationDate  2018-10-26 15:21:49\n",
-       "modifier\n",
-       "modificationDate  2018-10-26 15:21:49"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "project.save()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Project 20181026152148956-9272 successfully deleted.\n"
-     ]
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "project.delete('just a test')"
    ]
-- 
GitLab