From 8fd4129ee9b3b80c041233cbdd3f232a3904012a Mon Sep 17 00:00:00 2001 From: tpylak <tpylak> Date: Tue, 1 Feb 2011 22:35:16 +0000 Subject: [PATCH] LMS-1992 template dropbox: add documentation SVN: 19688 --- screening/etc/data-set-handler.py | 362 +++++++++++++++++++----------- 1 file changed, 236 insertions(+), 126 deletions(-) diff --git a/screening/etc/data-set-handler.py b/screening/etc/data-set-handler.py index 9bef1461d4d..89e2a271994 100755 --- a/screening/etc/data-set-handler.py +++ b/screening/etc/data-set-handler.py @@ -12,36 +12,194 @@ from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import * from ch.systemsx.cisd.openbis.dss.etl.custom.geexplorer import GEExplorerImageAnalysisResultParser from java.io import File +# ------------ +# Generic utility +# ------------ + +""" +Finds first occurence of the patter from the right. +Throws exception if the pattern cannot be found. +""" +def rfind(text, pattern): + ix = text.rfind(pattern) + ensurePatternFound(ix, text, pattern) + return ix + +""" +Finds first occurence of the patter from the left. +Throws exception if the pattern cannot be found. +""" +def find(text, pattern): + ix = text.find(pattern) + ensurePatternFound(ix, text, pattern) + return ix + +def ensurePatternFound(ix, file, pattern): + if ix == -1: + raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'") + +""" Returns: name of the file without the extension """ +def extract_file_basename(filename): + lastDot = filename.rfind(".") + if lastDot != -1: + return filename[0:lastDot] + else: + return filename + +""" Returns: extension of the file """ +def get_file_ext(file): + return os.path.splitext(file)[1][1:].lower() + +""" Returns: java.io.File - first file with the specified extension or None if no file matches """ +def find_file_by_ext(incoming_file, expected_ext): + if not incoming_file.isDirectory(): + return None + incoming_path = incoming_file.getPath() + for file in os.listdir(incoming_path): + ext = get_file_ext(file) + if ext.upper() == expected_ext.upper(): + return File(incoming_path, file) + return None + +""" Returns: java.io.File - subdirectory which contains the specified marker in the name """ +def find_dir(incoming_file, dir_name_marker): + if not incoming_file.isDirectory(): + return None + incoming_path = incoming_file.getPath() + for file in os.listdir(incoming_path): + if dir_name_marker.upper() in file.upper(): + return File(incoming_path, file) + return None + +def get_random_string(): + return str(int(random.random()*1000000000)) + +""" +Creates a temporary directory two levels above the specified incoming file. +The name of the directory will contain the specified label and a random text. +Returns: + java.io.File - path to the temporary directory +""" +def get_tmp_dir(incoming, label): + dropbox_parent_dir = incoming.getParentFile().getParent() + tmp_dir = File(dropbox_parent_dir, "tmp") + if not os.path.exists(tmp_dir.getPath()): + os.mkdir(tmp_dir.getPath()) + tmp_labeled_dir = File(tmp_dir, label + ".tmp." + get_random_string()) + os.mkdir(tmp_labeled_dir.getPath()) + return tmp_labeled_dir + +# ------------ +# Generic dataset registration +# ------------ + +""" Basic attributes of a dataset connected to a sample and optionally to one parent dataset """ +class Dataset: + # code of the dataset type and file format for the new dataset + dataset_type = "" + file_format = "" + # attributes of the connected sample + sample_code = "" + sample_space = "" + # attributes of the parent dataset - optional + parent_dataset_code = "" + # boolean which marks, if a dataset is measured or derived from measured data + is_measured = True + +""" +Sets registartion details using the specified Dataset. +Parameters: + dataset - basic information about the dataset (see Dataset type) + registration_details - DataSetRegistrationDetails which will be filled (see javadoc) +""" +def set_dataset_details(dataset, registration_details): + data_set_info = registration_details.getDataSetInformation() + data_set_info.setSpaceCode(dataset.sample_space) + data_set_info.setSampleCode(dataset.sample_code) + if (dataset.parent_dataset_code): + data_set_info.setParentDataSetCodes([dataset.parent_dataset_code]) + registration_details.setFileFormatType(FileFormatType(dataset.file_format)); + registration_details.setDataSetType(DataSetType(dataset.dataset_type)); + registration_details.setMeasuredData(dataset.is_measured); + +# ------------ +# Image dataset registration +# ------------ + +""" +Extends basic information about dataset with image dataset specific data. +""" +class ImageDataset(Dataset): + # location of the tile on the well matrix + tile_rows_number = -1 + tile_columns_number = -1 + # list of ImageFileInfo objects. Describes each image in the dataset. + images = None + # list of Channel objects. Describes all channels of the dataset. + channels = None + +""" +Auxiliary function to extract all channel codes used by specified images. +The channel label will be equal to channel code. +Parameters: + images - list of ImageFileInfo +Returns: + list of Channel +""" +def get_available_channels(images): + channel_codes = {} + for image in images: + channel_codes[image.getChannelCode()] = 1 + channels = [] + for channelCode in channel_codes.keys(): + channels.append(Channel(channelCode, channelCode)) + return channels + +""" +Converts ImageDataset to DataSetRegistrationDetails +Parameters: + image_dataset - ImageDataset +Returns: + DataSetRegistrationDetails +""" +def create_image_dataset_details(image_dataset): + registration_details = factory.createImageRegistrationDetails() + + set_dataset_details(image_dataset, registration_details) + + data_set_info = registration_details.getDataSetInformation() + data_set_info.setTileGeometry(image_dataset.tile_rows_number, image_dataset.tile_columns_number) + data_set_info.setImages(image_dataset.images) + data_set_info.setChannels(image_dataset.channels) + return registration_details + # ------------ # Dropbox specific image dataset registration. You may want to modify this part. # ------------ """ type of the new image dataset """ IMAGE_DATASET_TYPE = "HCS_IMAGE" -""" file format of files in a new image dataset """ +""" file format code of files in a new image dataset """ IMAGE_FILE_FORMAT = "TIFF" + """ type of the new analysis dataset """ ANALYSIS_DATASET_TYPE = "HCS_IMAGE_ANALYSIS_DATA" -""" file format of the analysis dataset file """ +""" file format of the analysis dataset """ ANALYSIS_FILE_FORMAT = "CSV" +""" type of the new image overlay dataset """ OVERLAY_IMAGE_DATASET_TYPE = "HCS_IMAGE_SEGMENTATION_OVERLAY" +""" file format of the image overlay dataset """ OVERLAY_IMAGE_FILE_FORMAT = "PNG" """ space where the plate for which the dataset has been acquired exist """ PLATE_SPACE = "DEMO" +""" only files with these extensions will be recognized as images """ RECOGNIZED_IMAGES_EXTENSIONS = ["tiff", "tif", "png", "gif", "jpg", "jpeg"] # --------- -""" sample type code of the plate, needed if a new sample is registered automatically """ -PLATE_TYPE_CODE = "PLATE" -""" project and experiment where new plates will be registered """ -DEFAULT_PROJECT_CODE = "TEST" -DEFAULT_EXPERIMENT_CODE = "SANOFI" -PLATE_GEOMETRY_PROPERTY_CODE = "$PLATE_GEOMETRY" -PLATE_GEOMETRY = "384_WELLS_16X24" """ extracts code of the sample from the directory name """ def extract_sample_code(incoming_name): @@ -52,19 +210,26 @@ def extract_sample_code(incoming_name): code = file_basename return code +""" +For a given tile number and tiles geometry returns a (x,y) tuple which describes where the tile +is located on the well. +""" def get_tile_coords(tile_num, tile_geometry): columns = tile_geometry[1] row = ((tile_num - 1) / columns) + 1 col = ((tile_num - 1) % columns) + 1 return (row, col) +""" Returns: integer - maximal tile number """ def get_max_tile_number(image_tokens_list): max_tile = 0 for image_tokens in image_tokens_list: max_tile = max(max_tile, image_tokens.tile) return max_tile -""" returns (rows, columns) """ +""" +Returns: (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well +""" def get_tile_geometry(image_tokens_list): max_tile = get_max_tile_number(image_tokens_list) if max_tile % 4 == 0 and max_tile != 4: @@ -76,15 +241,22 @@ def get_tile_geometry(image_tokens_list): else: return (max_tile, 1) +""" Auxiliary structure to store tokens of the image file name. """ class ImageTokens: + # channel code channel = None + # tile number tile = -1 + # path to the image path = "" + # well code, e.g. A1 well = "" """ Creates ImageTokens for a given path to an image Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif +Returns: + ImageTokens """ def create_image_tokens(path): image_tokens = ImageTokens() @@ -107,7 +279,9 @@ def create_image_tokens(path): """ Creates ImageFileInfo for a given path to an image -Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif +Example of the accepted file name: A - 1(fld 1 wv Cy5 - Cy5).tif +Returns: + ImageFileInfo """ def create_image_info(image_tokens, tile_geometry): fieldNum = image_tokens.tile - 1 @@ -118,6 +292,11 @@ def create_image_info(image_tokens, tile_geometry): img.setWell(image_tokens.well) return img +""" +Tokenizes file names of all images in the directory. +Returns: + list of ImageTokens +""" def parse_image_tokens(dir): image_tokens_list = [] dir_path = dir.getPath() @@ -133,7 +312,13 @@ def parse_image_tokens(dir): pass # extension not recognized return image_tokens_list - +""" +Parameters: +- image_tokens_list - list of ImageTokens for each image +- tile_geometry - (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well +Returns: + list of ImageFileInfo +""" def create_image_infos(image_tokens_list, tile_geometry): images = [] for image_tokens in image_tokens_list: @@ -141,108 +326,15 @@ def create_image_infos(image_tokens_list, tile_geometry): images.append(image) return images -# ------------ -# Generic utility -# ------------ - -def rfind(text, pattern): - ix = text.rfind(pattern) - ensurePatternFound(ix, text, pattern) - return ix - -def find(text, pattern): - ix = text.find(pattern) - ensurePatternFound(ix, text, pattern) - return ix - -def ensurePatternFound(ix, file, pattern): - if ix == -1: - raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'") - -def extract_file_basename(filename): - lastDot = filename.rfind(".") - if lastDot != -1: - return filename[0:lastDot] - else: - return filename - -def get_file_ext(file): - return os.path.splitext(file)[1][1:].lower() - -def find_file_by_ext(incoming_file, expected_ext): - if not incoming_file.isDirectory(): - return None - incoming_path = incoming_file.getPath() - for file in os.listdir(incoming_path): - ext = get_file_ext(file) - if ext.upper() == expected_ext.upper(): - return File(incoming_path, file) - return None - -def find_dir(incoming_file, dir_name_marker): - if not incoming_file.isDirectory(): - return None - incoming_path = incoming_file.getPath() - for file in os.listdir(incoming_path): - if dir_name_marker.upper() in file.upper(): - return File(incoming_path, file) - return None - -def get_random_string(): - return str(int(random.random()*1000000000)) - -def get_tmp_dir(incoming, label): - dropbox_parent_dir = incoming.getParentFile().getParent() - tmp_dir = File(dropbox_parent_dir, "tmp") - if not os.path.exists(tmp_dir.getPath()): - os.mkdir(tmp_dir.getPath()) - tmp_labeled_dir = File(tmp_dir, label + ".tmp." + get_random_string()) - os.mkdir(tmp_labeled_dir.getPath()) - return tmp_labeled_dir - -# ------------ -# Generic dataset registration -# ------------ - -class Dataset: - sample_code = "" - sample_space = "" - dataset_type = "" - file_format = "" - is_measured = True - parent_dataset_code = "" - -def set_dataset_details(dataset, registration_details): - data_set_info = registration_details.getDataSetInformation() - data_set_info.setSpaceCode(dataset.sample_space) - data_set_info.setSampleCode(dataset.sample_code) - if (dataset.parent_dataset_code): - data_set_info.setParentDataSetCodes([dataset.parent_dataset_code]) - registration_details.setFileFormatType(FileFormatType(dataset.file_format)); - registration_details.setDataSetType(DataSetType(dataset.dataset_type)); - registration_details.setMeasuredData(dataset.is_measured); - return registration_details - -# ------------ -# Image dataset registration -# ------------ - -class ImageDataset: # extends Dataset - tile_num = -1 - tile_rows_number = -1 - tile_columns_number = -1 - images = None - channels = None - -def get_available_channels(images): - channel_codes = {} - for image in images: - channel_codes[image.getChannelCode()] = 1 - channels = [] - for channelCode in channel_codes.keys(): - channels.append(Channel(channelCode, channelCode)) - return channels +# --------------------- +""" +Extracts all images from the incoming directory. +Parameters: + incoming - java.io.File, folder with images +Returns: + ImageDataset +""" def create_image_dataset(incoming): dataset = ImageDataset() @@ -263,19 +355,15 @@ def create_image_dataset(incoming): return dataset -def create_image_dataset_details(image_dataset): - registration_details = factory.createImageRegistrationDetails() - - set_dataset_details(image_dataset, registration_details) - - data_set_info = registration_details.getDataSetInformation() - data_set_info.setTileGeometry(image_dataset.tile_rows_number, image_dataset.tile_columns_number) - data_set_info.setImages(image_dataset.images) - data_set_info.setChannels(image_dataset.channels) - return registration_details - -# --------------------- - +""" +Extracts all overlay images from the overlays_dir directory. +Parameters: + overlays_dir - java.io.File, folder with + image_dataset - ImageDataset, image dataset to which the overlay dataset belongs + img_dataset_code - string, code of the image dataset to which the overlay dataset belongs +Returns: + ImageDataset +""" def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code): dataset = ImageDataset() @@ -297,18 +385,34 @@ def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code): return dataset +""" +Creates registration details of the image overlays dataset. +Parameters: + overlays_dir - java.io.File, folder with + image_dataset - ImageDataset, image dataset to which the overlay dataset belongs + img_dataset_code - string, code of the image dataset to which the overlay dataset belongs +Returns: + DataSetRegistrationDetails +""" def create_overlay_dataset_details(overlays_dir, image_dataset, img_dataset_code): overlay_dataset = create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code) overlay_dataset_details = create_image_dataset_details(overlay_dataset) data_set_info = overlay_dataset_details.getDataSetInformation() config = ImageStorageConfiguraton.createDefault() + # channels will be connected to the dataset config.setStoreChannelsOnExperimentLevel(False) data_set_info.setImageStorageConfiguraton(config) return overlay_dataset_details # --------------------- +""" +Creates the analysis dataset description. +The dataset will be connected to the specified sample and parent dataset. +Returns: + Dataset +""" def create_analysis_dataset(sample_space, sample_code, parent_dataset_code): dataset = Dataset() @@ -320,12 +424,18 @@ def create_analysis_dataset(sample_space, sample_code, parent_dataset_code): dataset.parent_dataset_code = parent_dataset_code return dataset +""" +Creates registration details of the analysis dataset. +Returns: + DataSetRegistrationDetails +""" def create_analysis_dataset_details(sample_space, sample_code, parent_dataset_code): analysis_registration_details = factory.createRegistrationDetails() analysis_dataset = create_analysis_dataset(sample_space, sample_code, parent_dataset_code) set_dataset_details(analysis_dataset, analysis_registration_details) return analysis_registration_details +""" registers sample if it does not exist already """ def register_sample_if_necessary(space_code, project_code, experiment_code, sample_code): openbis = state.getOpenBisService() sampleIdentifier = SampleIdentifier.create(space_code, sample_code) -- GitLab