From 3896e2c22106139a341e97275731bfb0ad8183f3 Mon Sep 17 00:00:00 2001 From: alaskowski <alaskowski@ethz.ch> Date: Thu, 24 Aug 2023 16:08:19 +0200 Subject: [PATCH] SSDM-13820: Updated pybis docs --- .../src/python/README.md | 187 +++++++++++------- 1 file changed, 119 insertions(+), 68 deletions(-) diff --git a/api-openbis-python3-pybis/src/python/README.md b/api-openbis-python3-pybis/src/python/README.md index 20ba1f6a2f7..6fdfba3d6f8 100644 --- a/api-openbis-python3-pybis/src/python/README.md +++ b/api-openbis-python3-pybis/src/python/README.md @@ -11,7 +11,7 @@ pyBIS is a Python module for interacting with openBIS. pyBIS is designed to be m ## Installation -``` +```commandline pip install --upgrade pybis ``` @@ -19,7 +19,7 @@ That command will download install pyBIS and all its dependencies. If pyBIS is a If you haven't done yet, install Jupyter and/or Jupyter Lab (the next Generation of Jupyter): -``` +```commandline pip install jupyter pip install jupyterlab ``` @@ -190,7 +190,7 @@ https://osxfuse.github.io **Unix Cent OS 7** -``` +```commandline $ sudo yum install epel-release $ sudo yum --enablerepo=epel -y install fuse-sshfs $ user="$(whoami)" @@ -203,7 +203,7 @@ After the installation, an `sshfs` command should be available. Because the mount/unmount procedure differs from platform to platform, pyBIS offers two simple methods: -``` +```python o.mount() o.mount(username, password, hostname, mountpoint, volname) o.is_mounted() @@ -232,7 +232,7 @@ OpenBIS stores quite a lot of meta-data along with your dataSets. The collection ## browse masterdata -``` +```python sample_types = o.get_sample_types() # get a list of sample types sample_types.df # DataFrame object st = o.get_sample_types()[3] # get 4th element of that list @@ -269,7 +269,7 @@ o.get_tags() The first step in creating a new entity type is to create a so called **property type**: -``` +```python pt_text = o.new_property_type( code = 'MY_NEW_PROPERTY_TYPE', label = 'yet another property type', @@ -340,7 +340,7 @@ The second step (after creating a property type, see above) is to create the **s - `new_sample_type()` == `new_object_type()` -``` +```python sample_type = o.new_sample_type( code = 'my_own_sample_type', # mandatory generatedCodePrefix = 'S', # mandatory @@ -358,7 +358,7 @@ sample_type.save() When `autoGeneratedCode` attribute is set to `True`, then you don't need to provide a value for `code` when you create a new sample. You can get the next autoGeneratedCode like this: -``` +```python sample_type.get_next_sequence() # eg. 67 sample_type.get_next_code() # e.g. FLY77 ``` @@ -369,11 +369,11 @@ From pyBIS 1.31.0 onwards, you can provide a `code` even for samples where its s The third step, after saving the sample type, is to **assign or revoke properties** to the newly created sample type. This assignment procedure applies to all entity types (dataset type, experiment type). -``` +```python sample_type.assign_property( - prop = 'diff_time', # mandatory - section = '', - ordinal = 5, + prop = 'diff_time', # Mandatory value + section = '', # Name of the section + ordinal = 1, # Position of property mandatory = True, initialValueForExistingEntities = 'initial value' showInEditView = True, @@ -383,11 +383,18 @@ sample_type.revoke_property('diff_time') sample_type.get_property_assignments() ``` +***âš ï¸ Note: ordinal position*** + +If a new property is assigned in a place of an existing property, the old property assignment ordinal value will be increased by 1 + + + + ## create a dataset type The second step (after creating a **property type**, see above) is to create the **dataset type**. The third step is to **assign or revoke the properties** to the newly created dataset type. -``` +```python dataset_type = o.new_dataset_type( code = 'my_dataset_type', # mandatory description = None, @@ -410,7 +417,7 @@ The new name for **experiment** is **collection**. You can use both methods inte - `new_experiment_type()` == `new_collection_type()` -``` +```python experiment_type = o.new_experiment_type( code, description = None, @@ -426,7 +433,7 @@ experiment_type.get_property_assignments() Materials and material types are deprecated in newer versions of openBIS. -``` +```python material_type = o.new_material_type( code, description=None, @@ -443,7 +450,7 @@ material_type.get_property_assignments() Plugins are Jython scripts that can accomplish more complex data-checks than ordinary types and vocabularies can achieve. They are assigned to entity types (dataset type, sample type etc). [Documentation and examples can be found here](https://wiki-bsse.ethz.ch/display/openBISDoc/Properties+Handled+By+Scripts) -``` +```python pl = o.new_plugin( name ='my_new_entry_validation_plugin', pluginType ='ENTITY_VALIDATION', # or 'DYNAMIC_PROPERTY' or 'MANAGED_PROPERTY', @@ -463,7 +470,7 @@ Users can only login into the openBIS system when: - the user is already added to the openBIS user list (see below) - the user is assigned a role which allows a login, either directly assigned or indirectly assigned via a group membership -``` +```python o.get_groups() group = o.new_group(code='group_name', description='...') group = o.get_group('group_name') @@ -506,7 +513,7 @@ Spaces are fundamental way in openBIS to divide access between groups. Within a - sample / object - dataset -``` +```python space = o.new_space(code='space_name', description='') space.save() o.get_spaces( @@ -543,7 +550,7 @@ Projects live within spaces and usually contain experiments (aka collections): - sample / object - dataset -``` +```python project = o.new_project( space = space, code = 'project_name', @@ -607,7 +614,7 @@ The new name for **experiment** is **collection**. You can use boths names inter ### create a new experiment -``` +```python exp = o.new_experiment code='MY_NEW_EXPERIMENT', type='DEFAULT_EXPERIMENT', @@ -619,7 +626,7 @@ exp.save() ### search for experiments -``` +```python experiments = o.get_experiments( project = 'YEASTS', space = 'MY_SPACE', @@ -645,7 +652,7 @@ However, if you want to include specific attributes in the results, you can do s The `get_experiments()` method results include only `identifier`, `permId`, `type`, `registrator`, `registrationDate`, `modifier`, `modificationDate` -```get attributes +```python experiments = o.get_experiments( project = 'YEASTS', space = 'MY_SPACE', @@ -661,7 +668,7 @@ experiments = o.get_experiments( ### Experiment attributes -``` +```python exp.attrs.all() # returns all attributes as a dict exp.attrs.tags = ['some', 'tags'] @@ -686,7 +693,7 @@ exp.save() # needed to save/update the changed attribute **Getting properties** -``` +```python experiment.props == ds.p # you can use either .props or .p to access the properties experiment.p # in Jupyter: show all properties in a nice table experiment.p() # get all properties as a dict @@ -731,7 +738,7 @@ The new name for **sample** is **object**. You can use boths names interchangeab etc. -``` +```python sample = o.new_sample( type = 'YEAST', space = 'MY_SPACE', @@ -784,7 +791,7 @@ Creating a single sample takes some time. If you need to create many samples, yo **create many samples in one transaction** -``` +```python trans = o.new_transaction() for i in range (0, 100): sample = o.new_sample(...) @@ -795,7 +802,7 @@ trans.commit() **update many samples in one transaction** -``` +```python trans = o.new_transaction() for sample in o.get_samples(count=100): sample.prop.some_property = 'different value' @@ -806,7 +813,7 @@ trans.commit() **delete many samples in one transaction** -``` +```python trans = o.new_transaction() for sample in o.get_samples(count=100): sample.mark_to_be_deleted() @@ -820,7 +827,7 @@ trans.commit() ### parents, children, components and container -``` +```python sample.get_parents() sample.set_parents(['/MY_SPACE/PARENT_SAMPLE_NAME') sample.add_parents('/MY_SPACE/PARENT_SAMPLE_NAME') @@ -850,7 +857,7 @@ sample.del_components('/MY_SPACE/COMPONENT_NAME') ### sample tags -``` +```python sample.get_tags() sample.set_tags('tag1') sample.add_tags(['tag2','tag3']) @@ -861,7 +868,7 @@ sample.del_tags('tag1') **Getting properties** -``` +```python sample.attrs.all() # returns all attributes as a dict sample.attribute_name # return the attribute value @@ -876,7 +883,7 @@ sample.p['property'] # get the value of a property **Setting properties** -``` +```python sample.experiment = 'first_exp' # assign sample to an experiment sample.project = 'my_project' # assign sample to a project @@ -896,7 +903,7 @@ sample.save() # needed to save/update the attributes and The result of a search is always list, even when no items are found. The `.df` attribute returns the Pandas dataFrame of the results. -``` +```python samples = o.get_samples( space ='MY_SPACE', type ='YEAST', @@ -940,13 +947,13 @@ samples = o.get_samples(props="*") # retrieve all properties of all samples ***Note: Attributes download*** -The `get_samples()` method, by default, returns fewer details to make the download process faster. +The `get_samples()` method, by default, returns fewer attributes to make the download process faster. However, if you want to include specific attributes in the results, you can do so by using the `attrs` parameter. The `get_samples()` method results include only `identifier`, `permId`, `type`, `registrator`, `registrationDate`, `modifier`, `modificationDate` -```get attributes -experiments = o.get_samples( +```python +samples = o.get_samples( space = 'MY_SPACE', type = 'YEAST', attrs = ["parents", "children"] @@ -958,9 +965,53 @@ experiments = o.get_samples( ``` -### freezing samples +**âš ï¸ Clarification** + +- `get_samples()` method is always downloading object properties +- Not downloaded properties (e.g `parents`, `children`) will not be removed upon `save()` unless explicitly done by the user. +- `None` values of list attributes are ignored during saving process + +**Example:** +```python +# get sample with get_sample() method +sample = o.get_sample('/DEFAULT/DEFAULT/EXP2') +sample + +Out[1]: +attribute value +------------------- ------------------------------ +code EXP2 +permId 20230823205338303-49 +identifier /DEFAULT/DEFAULT/EXP2 +type EXPERIMENTAL_STEP +project /DEFAULT/DEFAULT +parents [] # empty list +children ['/DEFAULT/DEFAULT/EXP3'] +components [] +``` + +```python +# get sample with get_samples() method +samples = o.get_samples(identifier='/DEFAULT/DEFAULT/EXP2') +samples[0] + +Out[1]: +attribute value +------------------- ------------------------------ +code EXP2 +permId 20230823205338303-49 +identifier /DEFAULT/DEFAULT/EXP2 +type EXPERIMENTAL_STEP +project /DEFAULT/DEFAULT +parents # None value +children # None value +components [] ``` + +### freezing samples + +```python sample.freeze = True sample.freezeForComponents = True sample.freezeForChildren = True @@ -989,7 +1040,7 @@ This example does the following - print the list of all files in this dataset - download the dataset -``` +```python datasets = sample.get_datasets(type='SCANS', start_with=0, count=10) for dataset in datasets: print(dataset.props()) @@ -1006,8 +1057,8 @@ However, if you want to include specific attributes in the results, you can do s The `get_datasets()` method results include only `permId`, `type`, `experiment`, `sample`, `registrationDate`, `modificationDate`, `location`, `status`, `presentInArchive`, `size` -```get attributes -experiments = o.get_datasets( +```python +datasets = o.get_datasets( space = 'MY_SPACE', attrs = ["parents", "children"] ) @@ -1020,7 +1071,7 @@ experiments = o.get_datasets( **More dataset functions:** -``` +```python ds = o.get_dataset('20160719143426517-259') ds.get_parents() ds.get_children() @@ -1048,7 +1099,7 @@ ds.download_attachments(<path or cwd>) # Deprecated, as attachments are not com ### download dataSets -``` +```python o.download_prefix # used for download() and symlink() method. # Is set to data/hostname by default, but can be changed. ds.get_files(start_folder="/") # get file list as Pandas dataFrame @@ -1070,7 +1121,7 @@ ds.is_physical() # TRUE if dataset is physically Instead of downloading a dataSet, you can create a symbolic link to a dataSet in the openBIS dataStore. To do that, the openBIS dataStore needs to be mounted first (see mount method above). **Note:** Symbolic links and the mount() feature currently do not work with Windows. -``` +```python o.download_prefix # used for download() and symlink() method. # Is set to data/hostname by default, but can be changed. ds.symlink() # creates a symlink for this dataset: data/hostname/permId @@ -1087,7 +1138,7 @@ ds.is_symlink() **Getting properties** -``` +```python ds.attrs.all() # returns all attributes as a dict ds.attribute_name # return the attribute value @@ -1102,7 +1153,7 @@ ds.p['property'] # get the value of a property **Setting properties** -``` +```python ds.experiment = 'first_exp' # assign dataset to an experiment ds.sample = 'my_sample' # assign dataset to a sample @@ -1120,7 +1171,7 @@ ds.set_props({ key: value }) # set the values of some properties - The result of a search is always list, even when no items are found - The `.df` attribute returns the Pandas dataFrame of the results -``` +```python datasets = o.get_datasets( type ='MY_DATASET_TYPE', **{ "SOME.WEIRD:PROP": "value"}, # property name contains a dot or a @@ -1155,7 +1206,7 @@ df = datasets.df # returns a Pandas dataFrame object of the In some cases, you might want to retrieve precisely certain datasets. This can be achieved by methods chaining (but be aware, it might not be very performant): -``` +```python datasets = o.get_experiments(project='YEASTS')\ .get_samples(type='FLY')\ .get_datasets( @@ -1167,7 +1218,7 @@ datasets = o.get_experiments(project='YEASTS')\ - another example: -``` +```python datasets = o.get_experiment('/MY_NEW_SPACE/MY_PROJECT/MY_EXPERIMENT4')\ .get_samples(type='UNKNOWN')\ .get_parents()\ @@ -1179,7 +1230,7 @@ datasets = o.get_experiment('/MY_NEW_SPACE/MY_PROJECT/MY_EXPERIMENT4')\ - once a dataSet has been frozen, it cannot be changed by anyone anymore - so be careful! -``` +```python ds.freeze = True ds.freezeForChildren = True ds.freezeForParents = True @@ -1190,7 +1241,7 @@ ds.save() ### create a new dataSet -``` +```python ds_new = o.new_dataset( type = 'ANALYZED_DATA', experiment = '/SPACE/PROJECT/EXP1', @@ -1234,7 +1285,7 @@ ds_new.save() | `../../myData/` | `myData/` | | `some/experiment/results/` | `results/` | -``` +```python ds_new = o.new_dataset( type = 'RAW_DATA', sample = '/SPACE/SAMP1', @@ -1247,7 +1298,7 @@ ds_new.save() A DataSet of kind=CONTAINER contains other DataSets, but no files: -``` +```python ds_new = o.new_dataset( type = 'ANALYZED_DATA', experiment = '/SPACE/PROJECT/EXP1', @@ -1260,7 +1311,7 @@ ds_new.save() ### get, set, add and remove parent datasets -``` +```python dataset.get_parents() dataset.set_parents(['20170115220259155-412']) dataset.add_parents(['20170115220259155-412']) @@ -1269,7 +1320,7 @@ dataset.del_parents(['20170115220259155-412']) #### get, set, add and remove child datasets -``` +```python dataset.get_children() dataset.set_children(['20170115220259155-412']) dataset.add_children(['20170115220259155-412']) @@ -1282,7 +1333,7 @@ dataset.del_children(['20170115220259155-412']) - As opposed to Samples, DataSets may belong (contained) to more than one DataSet-container - caveat: containers are NOT compatible with ELN-LIMS -``` +```python dataset.get_containers() dataset.set_containers(['20170115220259155-412']) dataset.add_containers(['20170115220259155-412']) @@ -1294,7 +1345,7 @@ dataset.del_containers(['20170115220259155-412']) - you may also use the xxx_contained() functions, which are just aliases. - caveat: components are NOT compatible with ELN-LIMS -``` +```python dataset.get_components() dataset.set_components(['20170115220259155-412']) dataset.add_components(['20170115220259155-412']) @@ -1305,7 +1356,7 @@ dataset.del_components(['20170115220259155-412']) create semantic annotation for sample type 'UNKNOWN': -``` +```python sa = o.new_semantic_annotation( entityType = 'UNKNOWN', @@ -1321,14 +1372,14 @@ sa.save() Create semantic annotation for property type (predicate and descriptor values omitted for brevity) -``` +```python sa = o.new_semantic_annotation(propertyType = 'DESCRIPTION', ...) sa.save() ``` **Create** semantic annotation for sample property assignment (predicate and descriptor values omitted for brevity) -``` +```python sa = o.new_semantic_annotation( entityType = 'UNKNOWN', propertyType = 'DESCRIPTION', @@ -1339,26 +1390,26 @@ sa.save() **Create** a semantic annotation directly from a sample type. Will also create sample property assignment annotations when propertyType is given: -``` +```python st = o.get_sample_type("ORDER") st.new_semantic_annotation(...) ``` **Get all** semantic annotations -``` +```python o.get_semantic_annotations() ``` **Get** semantic annotation by perm id -``` +```python sa = o.get_semantic_annotation("20171015135637955-30") ``` **Update** semantic annotation -``` +```python sa.predicateOntologyId = 'new_po_id' sa.descriptorOntologyId = 'new_do_id' sa.save() @@ -1366,13 +1417,13 @@ sa.save() **Delete** semantic annotation -``` +```python sa.delete('reason') ``` ## Tags -``` +```python new_tag = o.new_tag( code = 'my_tag', description = 'some descriptive text' @@ -1410,7 +1461,7 @@ So for example, you want to add a property called **Animal** to a Sample and you **create new Vocabulary with three VocabularyTerms** -``` +```python voc = o.new_vocabulary( code = 'BBB', description = 'description of vocabulary aaa', @@ -1430,7 +1481,7 @@ voc.save() # update **create additional VocabularyTerms** -``` +```python term = o.new_term( code='TERM_CODE_XXX', vocabularyCode='BBB', @@ -1444,7 +1495,7 @@ term.save() To change the ordinal of a term, it has to be moved either to the top with the `.move_to_top()` method or after another term using the `.move_after_term('TERM_BEFORE')` method. -``` +```python voc = o.get_vocabulary('STORAGE') term = voc.get_terms()['RT'] term.label = "Room Temperature" -- GitLab