From 8fd4129ee9b3b80c041233cbdd3f232a3904012a Mon Sep 17 00:00:00 2001
From: tpylak <tpylak>
Date: Tue, 1 Feb 2011 22:35:16 +0000
Subject: [PATCH] LMS-1992 template dropbox: add documentation

SVN: 19688
---
 screening/etc/data-set-handler.py | 362 +++++++++++++++++++-----------
 1 file changed, 236 insertions(+), 126 deletions(-)

diff --git a/screening/etc/data-set-handler.py b/screening/etc/data-set-handler.py
index 9bef1461d4d..89e2a271994 100755
--- a/screening/etc/data-set-handler.py
+++ b/screening/etc/data-set-handler.py
@@ -12,36 +12,194 @@ from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import *
 from ch.systemsx.cisd.openbis.dss.etl.custom.geexplorer import GEExplorerImageAnalysisResultParser
 from java.io import File
 
+# ------------
+# Generic utility
+# ------------
+
+""" 
+Finds first occurence of the patter from the right.
+Throws exception if the pattern cannot be found.
+"""
+def rfind(text, pattern):
+    ix = text.rfind(pattern)
+    ensurePatternFound(ix, text, pattern)
+    return ix
+
+""" 
+Finds first occurence of the patter from the left. 
+Throws exception if the pattern cannot be found.
+"""
+def find(text, pattern):
+    ix = text.find(pattern)
+    ensurePatternFound(ix, text, pattern)
+    return ix
+
+def ensurePatternFound(ix, file, pattern):
+    if ix == -1:
+        raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'")    
+
+""" Returns: name of the file without the extension """
+def extract_file_basename(filename):
+    lastDot = filename.rfind(".")
+    if lastDot != -1:
+        return filename[0:lastDot]
+    else:
+        return filename
+
+""" Returns: extension of the file """
+def get_file_ext(file):
+    return os.path.splitext(file)[1][1:].lower()
+
+""" Returns: java.io.File - first file with the specified extension or None if no file matches """
+def find_file_by_ext(incoming_file, expected_ext):
+    if not incoming_file.isDirectory():
+        return None
+    incoming_path = incoming_file.getPath()
+    for file in os.listdir(incoming_path):
+        ext = get_file_ext(file)
+        if ext.upper() == expected_ext.upper():
+            return File(incoming_path, file)
+    return None
+
+""" Returns: java.io.File - subdirectory which contains the specified marker in the name """
+def find_dir(incoming_file, dir_name_marker):
+    if not incoming_file.isDirectory():
+        return None
+    incoming_path = incoming_file.getPath()
+    for file in os.listdir(incoming_path):
+        if dir_name_marker.upper() in file.upper():
+            return File(incoming_path, file)
+    return None
+
+def get_random_string():
+    return str(int(random.random()*1000000000))
+
+""" 
+Creates a temporary directory two levels above the specified incoming file.
+The name of the directory will contain the specified label and a random text. 
+Returns:
+    java.io.File - path to the temporary directory
+"""
+def get_tmp_dir(incoming, label):
+    dropbox_parent_dir = incoming.getParentFile().getParent()
+    tmp_dir = File(dropbox_parent_dir, "tmp")
+    if not os.path.exists(tmp_dir.getPath()):
+        os.mkdir(tmp_dir.getPath())
+    tmp_labeled_dir = File(tmp_dir, label + ".tmp." + get_random_string())
+    os.mkdir(tmp_labeled_dir.getPath())
+    return tmp_labeled_dir
+
+# ------------
+# Generic dataset registration
+# ------------
+
+""" Basic attributes of a dataset connected to a sample and optionally to one parent dataset """
+class Dataset:
+    # code of the dataset type and file format for the new dataset
+    dataset_type = ""
+    file_format = ""
+    # attributes of the connected sample
+    sample_code = ""
+    sample_space = ""
+    # attributes of the parent dataset - optional
+    parent_dataset_code = ""
+    # boolean which marks, if a dataset is measured or derived from measured data
+    is_measured = True
+
+"""
+Sets registartion details using the specified Dataset.
+Parameters:
+   dataset - basic information about the dataset (see Dataset type)
+   registration_details - DataSetRegistrationDetails which will be filled (see javadoc)
+"""
+def set_dataset_details(dataset, registration_details):
+    data_set_info = registration_details.getDataSetInformation()
+    data_set_info.setSpaceCode(dataset.sample_space)
+    data_set_info.setSampleCode(dataset.sample_code)
+    if (dataset.parent_dataset_code):
+        data_set_info.setParentDataSetCodes([dataset.parent_dataset_code])
+    registration_details.setFileFormatType(FileFormatType(dataset.file_format));
+    registration_details.setDataSetType(DataSetType(dataset.dataset_type));
+    registration_details.setMeasuredData(dataset.is_measured);
+
+# ------------
+# Image dataset registration
+# ------------
+
+"""
+Extends basic information about dataset with image dataset specific data.
+"""
+class ImageDataset(Dataset): 
+    # location of the tile on the well matrix
+    tile_rows_number = -1
+    tile_columns_number = -1
+    # list of ImageFileInfo objects. Describes each image in the dataset.
+    images = None
+    # list of Channel objects. Describes all channels of the dataset.
+    channels = None
+
+"""
+Auxiliary function to extract all channel codes used by specified images.
+The channel label will be equal to channel code.
+Parameters:
+    images - list of ImageFileInfo
+Returns: 
+    list of Channel
+"""
+def get_available_channels(images):
+    channel_codes = {}
+    for image in images:
+        channel_codes[image.getChannelCode()] = 1
+    channels = []
+    for channelCode in channel_codes.keys():
+        channels.append(Channel(channelCode, channelCode))
+    return channels
+
+"""
+Converts ImageDataset to DataSetRegistrationDetails
+Parameters:
+    image_dataset - ImageDataset
+Returns: 
+    DataSetRegistrationDetails
+"""
+def create_image_dataset_details(image_dataset):
+    registration_details = factory.createImageRegistrationDetails()
+    
+    set_dataset_details(image_dataset, registration_details)
+
+    data_set_info = registration_details.getDataSetInformation()
+    data_set_info.setTileGeometry(image_dataset.tile_rows_number, image_dataset.tile_columns_number)
+    data_set_info.setImages(image_dataset.images)
+    data_set_info.setChannels(image_dataset.channels)
+    return registration_details
+    
 # ------------
 # Dropbox specific image dataset registration. You may want to modify this part.
 # ------------
 
 """ type of the new image dataset """
 IMAGE_DATASET_TYPE = "HCS_IMAGE"
-""" file format of files in a new image dataset """
+""" file format code of files in a new image dataset """
 IMAGE_FILE_FORMAT = "TIFF"
+
 """ type of the new analysis dataset """
 ANALYSIS_DATASET_TYPE = "HCS_IMAGE_ANALYSIS_DATA"
-""" file format of the analysis dataset file """
+""" file format of the analysis dataset """
 ANALYSIS_FILE_FORMAT = "CSV"
 
+""" type of the new image overlay dataset """
 OVERLAY_IMAGE_DATASET_TYPE = "HCS_IMAGE_SEGMENTATION_OVERLAY"
+""" file format of the image overlay dataset """
 OVERLAY_IMAGE_FILE_FORMAT = "PNG"
 
 """ space where the plate for which the dataset has been acquired exist """
 PLATE_SPACE = "DEMO"
 
+""" only files with these extensions will be recognized as images """
 RECOGNIZED_IMAGES_EXTENSIONS = ["tiff", "tif", "png", "gif", "jpg", "jpeg"]
 
 
 # ---------
-""" sample type code of the plate, needed if a new sample is registered automatically """
-PLATE_TYPE_CODE = "PLATE"
-""" project and experiment where new plates will be registered """
-DEFAULT_PROJECT_CODE = "TEST"
-DEFAULT_EXPERIMENT_CODE = "SANOFI"
-PLATE_GEOMETRY_PROPERTY_CODE = "$PLATE_GEOMETRY"
-PLATE_GEOMETRY = "384_WELLS_16X24"
 
 """ extracts code of the sample from the directory name """
 def extract_sample_code(incoming_name):
@@ -52,19 +210,26 @@ def extract_sample_code(incoming_name):
         code = file_basename
     return code
 
+""" 
+For a given tile number and tiles geometry returns a (x,y) tuple which describes where the tile
+is located on the well.
+"""
 def get_tile_coords(tile_num, tile_geometry):
     columns = tile_geometry[1]
     row = ((tile_num - 1) / columns) + 1
     col = ((tile_num - 1) % columns) + 1
     return (row, col)
 
+""" Returns: integer - maximal tile number """
 def get_max_tile_number(image_tokens_list):
     max_tile = 0
     for image_tokens in image_tokens_list:
         max_tile = max(max_tile, image_tokens.tile)
     return max_tile
 
-""" returns (rows, columns) """
+""" 
+Returns:  (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well  
+"""
 def get_tile_geometry(image_tokens_list):
     max_tile = get_max_tile_number(image_tokens_list)
     if max_tile % 4 == 0 and max_tile != 4:
@@ -76,15 +241,22 @@ def get_tile_geometry(image_tokens_list):
     else:
         return (max_tile, 1)
 
+""" Auxiliary structure to store tokens of the image file name.  """
 class ImageTokens:
+    # channel code
     channel = None
+    # tile number
     tile = -1
+    # path to the image
     path = ""
+    # well code, e.g. A1
     well = ""
 
 """
 Creates ImageTokens for a given path to an image
 Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif
+Returns:
+    ImageTokens
 """
 def create_image_tokens(path):
     image_tokens = ImageTokens()
@@ -107,7 +279,9 @@ def create_image_tokens(path):
     
 """ 
 Creates ImageFileInfo for a given path to an image
-Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif
+Example of the accepted file name: A - 1(fld 1 wv Cy5 - Cy5).tif
+Returns:
+   ImageFileInfo 
 """
 def create_image_info(image_tokens, tile_geometry):
     fieldNum = image_tokens.tile - 1
@@ -118,6 +292,11 @@ def create_image_info(image_tokens, tile_geometry):
     img.setWell(image_tokens.well)
     return img
 
+"""
+Tokenizes file names of all images in the directory.
+Returns: 
+  list of ImageTokens
+"""
 def parse_image_tokens(dir):
     image_tokens_list = []
     dir_path = dir.getPath()
@@ -133,7 +312,13 @@ def parse_image_tokens(dir):
             pass # extension not recognized    
     return image_tokens_list
 
-    
+"""
+Parameters:
+- image_tokens_list - list of ImageTokens for each image
+- tile_geometry - (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well  
+Returns: 
+  list of ImageFileInfo
+"""    
 def create_image_infos(image_tokens_list, tile_geometry):
     images = []
     for image_tokens in image_tokens_list:
@@ -141,108 +326,15 @@ def create_image_infos(image_tokens_list, tile_geometry):
         images.append(image)    
     return images
 
-# ------------
-# Generic utility
-# ------------
-
-def rfind(text, pattern):
-    ix = text.rfind(pattern)
-    ensurePatternFound(ix, text, pattern)
-    return ix
-
-def find(text, pattern):
-    ix = text.find(pattern)
-    ensurePatternFound(ix, text, pattern)
-    return ix
-
-def ensurePatternFound(ix, file, pattern):
-    if ix == -1:
-        raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'")    
-
-def extract_file_basename(filename):
-    lastDot = filename.rfind(".")
-    if lastDot != -1:
-        return filename[0:lastDot]
-    else:
-        return filename
-
-def get_file_ext(file):
-    return os.path.splitext(file)[1][1:].lower()
-
-def find_file_by_ext(incoming_file, expected_ext):
-    if not incoming_file.isDirectory():
-        return None
-    incoming_path = incoming_file.getPath()
-    for file in os.listdir(incoming_path):
-        ext = get_file_ext(file)
-        if ext.upper() == expected_ext.upper():
-            return File(incoming_path, file)
-    return None
-
-def find_dir(incoming_file, dir_name_marker):
-    if not incoming_file.isDirectory():
-        return None
-    incoming_path = incoming_file.getPath()
-    for file in os.listdir(incoming_path):
-        if dir_name_marker.upper() in file.upper():
-            return File(incoming_path, file)
-    return None
-
-def get_random_string():
-    return str(int(random.random()*1000000000))
-
-def get_tmp_dir(incoming, label):
-    dropbox_parent_dir = incoming.getParentFile().getParent()
-    tmp_dir = File(dropbox_parent_dir, "tmp")
-    if not os.path.exists(tmp_dir.getPath()):
-        os.mkdir(tmp_dir.getPath())
-    tmp_labeled_dir = File(tmp_dir, label + ".tmp." + get_random_string())
-    os.mkdir(tmp_labeled_dir.getPath())
-    return tmp_labeled_dir
-
-# ------------
-# Generic dataset registration
-# ------------
-
-class Dataset:
-    sample_code = ""
-    sample_space = ""
-    dataset_type = ""
-    file_format = ""
-    is_measured = True
-    parent_dataset_code = ""
-
-def set_dataset_details(dataset, registration_details):
-    data_set_info = registration_details.getDataSetInformation()
-    data_set_info.setSpaceCode(dataset.sample_space)
-    data_set_info.setSampleCode(dataset.sample_code)
-    if (dataset.parent_dataset_code):
-        data_set_info.setParentDataSetCodes([dataset.parent_dataset_code])
-    registration_details.setFileFormatType(FileFormatType(dataset.file_format));
-    registration_details.setDataSetType(DataSetType(dataset.dataset_type));
-    registration_details.setMeasuredData(dataset.is_measured);
-    return registration_details
-
-# ------------
-# Image dataset registration
-# ------------
-
-class ImageDataset: # extends Dataset
-    tile_num = -1
-    tile_rows_number = -1
-    tile_columns_number = -1
-    images = None
-    channels = None
-
-def get_available_channels(images):
-    channel_codes = {}
-    for image in images:
-        channel_codes[image.getChannelCode()] = 1
-    channels = []
-    for channelCode in channel_codes.keys():
-        channels.append(Channel(channelCode, channelCode))
-    return channels
+# ---------------------
 
+"""
+Extracts all images from the incoming directory.
+Parameters:
+    incoming - java.io.File, folder with images
+Returns:
+    ImageDataset
+"""
 def create_image_dataset(incoming):
     dataset = ImageDataset()
     
@@ -263,19 +355,15 @@ def create_image_dataset(incoming):
 
     return dataset
 
-def create_image_dataset_details(image_dataset):
-    registration_details = factory.createImageRegistrationDetails()
-    
-    set_dataset_details(image_dataset, registration_details)
-
-    data_set_info = registration_details.getDataSetInformation()
-    data_set_info.setTileGeometry(image_dataset.tile_rows_number, image_dataset.tile_columns_number)
-    data_set_info.setImages(image_dataset.images)
-    data_set_info.setChannels(image_dataset.channels)
-    return registration_details
-    
-# ---------------------
-
+"""
+Extracts all overlay images from the overlays_dir directory.
+Parameters:
+    overlays_dir - java.io.File, folder with 
+    image_dataset - ImageDataset, image dataset to which the overlay dataset belongs
+    img_dataset_code - string, code of the  image dataset to which the overlay dataset belongs
+Returns:
+    ImageDataset
+"""
 def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code):
     dataset = ImageDataset()
     
@@ -297,18 +385,34 @@ def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code):
 
     return dataset
 
+"""
+Creates registration details of the image overlays dataset.
+Parameters:
+    overlays_dir - java.io.File, folder with 
+    image_dataset - ImageDataset, image dataset to which the overlay dataset belongs
+    img_dataset_code - string, code of the  image dataset to which the overlay dataset belongs
+Returns:
+    DataSetRegistrationDetails
+"""
 def create_overlay_dataset_details(overlays_dir, image_dataset, img_dataset_code):
     overlay_dataset = create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code)
     overlay_dataset_details = create_image_dataset_details(overlay_dataset)
 
     data_set_info = overlay_dataset_details.getDataSetInformation()
     config = ImageStorageConfiguraton.createDefault()
+    # channels will be connected to the dataset
     config.setStoreChannelsOnExperimentLevel(False)
     data_set_info.setImageStorageConfiguraton(config)
     return overlay_dataset_details
 
 # ---------------------
 
+"""
+Creates the analysis dataset description. 
+The dataset will be connected to the specified sample and parent dataset.
+Returns:
+    Dataset
+"""
 def create_analysis_dataset(sample_space, sample_code, parent_dataset_code):
     dataset = Dataset()
     
@@ -320,12 +424,18 @@ def create_analysis_dataset(sample_space, sample_code, parent_dataset_code):
     dataset.parent_dataset_code = parent_dataset_code
     return dataset
 
+"""
+Creates registration details of the analysis dataset.
+Returns:
+    DataSetRegistrationDetails
+"""
 def create_analysis_dataset_details(sample_space, sample_code, parent_dataset_code):
     analysis_registration_details = factory.createRegistrationDetails()
     analysis_dataset = create_analysis_dataset(sample_space, sample_code, parent_dataset_code)
     set_dataset_details(analysis_dataset, analysis_registration_details)
     return analysis_registration_details
 
+""" registers sample if it does not exist already """
 def register_sample_if_necessary(space_code, project_code, experiment_code, sample_code):   
     openbis = state.getOpenBisService()
     sampleIdentifier = SampleIdentifier.create(space_code, sample_code)
-- 
GitLab