Skip to content
Snippets Groups Projects
Commit 8fd4129e authored by tpylak's avatar tpylak
Browse files

LMS-1992 template dropbox: add documentation

SVN: 19688
parent 416739a2
No related branches found
No related tags found
No related merge requests found
...@@ -12,36 +12,194 @@ from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import * ...@@ -12,36 +12,194 @@ from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import *
from ch.systemsx.cisd.openbis.dss.etl.custom.geexplorer import GEExplorerImageAnalysisResultParser from ch.systemsx.cisd.openbis.dss.etl.custom.geexplorer import GEExplorerImageAnalysisResultParser
from java.io import File from java.io import File
# ------------
# Generic utility
# ------------
"""
Finds first occurence of the patter from the right.
Throws exception if the pattern cannot be found.
"""
def rfind(text, pattern):
ix = text.rfind(pattern)
ensurePatternFound(ix, text, pattern)
return ix
"""
Finds first occurence of the patter from the left.
Throws exception if the pattern cannot be found.
"""
def find(text, pattern):
ix = text.find(pattern)
ensurePatternFound(ix, text, pattern)
return ix
def ensurePatternFound(ix, file, pattern):
if ix == -1:
raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'")
""" Returns: name of the file without the extension """
def extract_file_basename(filename):
lastDot = filename.rfind(".")
if lastDot != -1:
return filename[0:lastDot]
else:
return filename
""" Returns: extension of the file """
def get_file_ext(file):
return os.path.splitext(file)[1][1:].lower()
""" Returns: java.io.File - first file with the specified extension or None if no file matches """
def find_file_by_ext(incoming_file, expected_ext):
if not incoming_file.isDirectory():
return None
incoming_path = incoming_file.getPath()
for file in os.listdir(incoming_path):
ext = get_file_ext(file)
if ext.upper() == expected_ext.upper():
return File(incoming_path, file)
return None
""" Returns: java.io.File - subdirectory which contains the specified marker in the name """
def find_dir(incoming_file, dir_name_marker):
if not incoming_file.isDirectory():
return None
incoming_path = incoming_file.getPath()
for file in os.listdir(incoming_path):
if dir_name_marker.upper() in file.upper():
return File(incoming_path, file)
return None
def get_random_string():
return str(int(random.random()*1000000000))
"""
Creates a temporary directory two levels above the specified incoming file.
The name of the directory will contain the specified label and a random text.
Returns:
java.io.File - path to the temporary directory
"""
def get_tmp_dir(incoming, label):
dropbox_parent_dir = incoming.getParentFile().getParent()
tmp_dir = File(dropbox_parent_dir, "tmp")
if not os.path.exists(tmp_dir.getPath()):
os.mkdir(tmp_dir.getPath())
tmp_labeled_dir = File(tmp_dir, label + ".tmp." + get_random_string())
os.mkdir(tmp_labeled_dir.getPath())
return tmp_labeled_dir
# ------------
# Generic dataset registration
# ------------
""" Basic attributes of a dataset connected to a sample and optionally to one parent dataset """
class Dataset:
# code of the dataset type and file format for the new dataset
dataset_type = ""
file_format = ""
# attributes of the connected sample
sample_code = ""
sample_space = ""
# attributes of the parent dataset - optional
parent_dataset_code = ""
# boolean which marks, if a dataset is measured or derived from measured data
is_measured = True
"""
Sets registartion details using the specified Dataset.
Parameters:
dataset - basic information about the dataset (see Dataset type)
registration_details - DataSetRegistrationDetails which will be filled (see javadoc)
"""
def set_dataset_details(dataset, registration_details):
data_set_info = registration_details.getDataSetInformation()
data_set_info.setSpaceCode(dataset.sample_space)
data_set_info.setSampleCode(dataset.sample_code)
if (dataset.parent_dataset_code):
data_set_info.setParentDataSetCodes([dataset.parent_dataset_code])
registration_details.setFileFormatType(FileFormatType(dataset.file_format));
registration_details.setDataSetType(DataSetType(dataset.dataset_type));
registration_details.setMeasuredData(dataset.is_measured);
# ------------
# Image dataset registration
# ------------
"""
Extends basic information about dataset with image dataset specific data.
"""
class ImageDataset(Dataset):
# location of the tile on the well matrix
tile_rows_number = -1
tile_columns_number = -1
# list of ImageFileInfo objects. Describes each image in the dataset.
images = None
# list of Channel objects. Describes all channels of the dataset.
channels = None
"""
Auxiliary function to extract all channel codes used by specified images.
The channel label will be equal to channel code.
Parameters:
images - list of ImageFileInfo
Returns:
list of Channel
"""
def get_available_channels(images):
channel_codes = {}
for image in images:
channel_codes[image.getChannelCode()] = 1
channels = []
for channelCode in channel_codes.keys():
channels.append(Channel(channelCode, channelCode))
return channels
"""
Converts ImageDataset to DataSetRegistrationDetails
Parameters:
image_dataset - ImageDataset
Returns:
DataSetRegistrationDetails
"""
def create_image_dataset_details(image_dataset):
registration_details = factory.createImageRegistrationDetails()
set_dataset_details(image_dataset, registration_details)
data_set_info = registration_details.getDataSetInformation()
data_set_info.setTileGeometry(image_dataset.tile_rows_number, image_dataset.tile_columns_number)
data_set_info.setImages(image_dataset.images)
data_set_info.setChannels(image_dataset.channels)
return registration_details
# ------------ # ------------
# Dropbox specific image dataset registration. You may want to modify this part. # Dropbox specific image dataset registration. You may want to modify this part.
# ------------ # ------------
""" type of the new image dataset """ """ type of the new image dataset """
IMAGE_DATASET_TYPE = "HCS_IMAGE" IMAGE_DATASET_TYPE = "HCS_IMAGE"
""" file format of files in a new image dataset """ """ file format code of files in a new image dataset """
IMAGE_FILE_FORMAT = "TIFF" IMAGE_FILE_FORMAT = "TIFF"
""" type of the new analysis dataset """ """ type of the new analysis dataset """
ANALYSIS_DATASET_TYPE = "HCS_IMAGE_ANALYSIS_DATA" ANALYSIS_DATASET_TYPE = "HCS_IMAGE_ANALYSIS_DATA"
""" file format of the analysis dataset file """ """ file format of the analysis dataset """
ANALYSIS_FILE_FORMAT = "CSV" ANALYSIS_FILE_FORMAT = "CSV"
""" type of the new image overlay dataset """
OVERLAY_IMAGE_DATASET_TYPE = "HCS_IMAGE_SEGMENTATION_OVERLAY" OVERLAY_IMAGE_DATASET_TYPE = "HCS_IMAGE_SEGMENTATION_OVERLAY"
""" file format of the image overlay dataset """
OVERLAY_IMAGE_FILE_FORMAT = "PNG" OVERLAY_IMAGE_FILE_FORMAT = "PNG"
""" space where the plate for which the dataset has been acquired exist """ """ space where the plate for which the dataset has been acquired exist """
PLATE_SPACE = "DEMO" PLATE_SPACE = "DEMO"
""" only files with these extensions will be recognized as images """
RECOGNIZED_IMAGES_EXTENSIONS = ["tiff", "tif", "png", "gif", "jpg", "jpeg"] RECOGNIZED_IMAGES_EXTENSIONS = ["tiff", "tif", "png", "gif", "jpg", "jpeg"]
# --------- # ---------
""" sample type code of the plate, needed if a new sample is registered automatically """
PLATE_TYPE_CODE = "PLATE"
""" project and experiment where new plates will be registered """
DEFAULT_PROJECT_CODE = "TEST"
DEFAULT_EXPERIMENT_CODE = "SANOFI"
PLATE_GEOMETRY_PROPERTY_CODE = "$PLATE_GEOMETRY"
PLATE_GEOMETRY = "384_WELLS_16X24"
""" extracts code of the sample from the directory name """ """ extracts code of the sample from the directory name """
def extract_sample_code(incoming_name): def extract_sample_code(incoming_name):
...@@ -52,19 +210,26 @@ def extract_sample_code(incoming_name): ...@@ -52,19 +210,26 @@ def extract_sample_code(incoming_name):
code = file_basename code = file_basename
return code return code
"""
For a given tile number and tiles geometry returns a (x,y) tuple which describes where the tile
is located on the well.
"""
def get_tile_coords(tile_num, tile_geometry): def get_tile_coords(tile_num, tile_geometry):
columns = tile_geometry[1] columns = tile_geometry[1]
row = ((tile_num - 1) / columns) + 1 row = ((tile_num - 1) / columns) + 1
col = ((tile_num - 1) % columns) + 1 col = ((tile_num - 1) % columns) + 1
return (row, col) return (row, col)
""" Returns: integer - maximal tile number """
def get_max_tile_number(image_tokens_list): def get_max_tile_number(image_tokens_list):
max_tile = 0 max_tile = 0
for image_tokens in image_tokens_list: for image_tokens in image_tokens_list:
max_tile = max(max_tile, image_tokens.tile) max_tile = max(max_tile, image_tokens.tile)
return max_tile return max_tile
""" returns (rows, columns) """ """
Returns: (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well
"""
def get_tile_geometry(image_tokens_list): def get_tile_geometry(image_tokens_list):
max_tile = get_max_tile_number(image_tokens_list) max_tile = get_max_tile_number(image_tokens_list)
if max_tile % 4 == 0 and max_tile != 4: if max_tile % 4 == 0 and max_tile != 4:
...@@ -76,15 +241,22 @@ def get_tile_geometry(image_tokens_list): ...@@ -76,15 +241,22 @@ def get_tile_geometry(image_tokens_list):
else: else:
return (max_tile, 1) return (max_tile, 1)
""" Auxiliary structure to store tokens of the image file name. """
class ImageTokens: class ImageTokens:
# channel code
channel = None channel = None
# tile number
tile = -1 tile = -1
# path to the image
path = "" path = ""
# well code, e.g. A1
well = "" well = ""
""" """
Creates ImageTokens for a given path to an image Creates ImageTokens for a given path to an image
Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif
Returns:
ImageTokens
""" """
def create_image_tokens(path): def create_image_tokens(path):
image_tokens = ImageTokens() image_tokens = ImageTokens()
...@@ -107,7 +279,9 @@ def create_image_tokens(path): ...@@ -107,7 +279,9 @@ def create_image_tokens(path):
""" """
Creates ImageFileInfo for a given path to an image Creates ImageFileInfo for a given path to an image
Example file name: A - 1(fld 1 wv Cy5 - Cy5).tif Example of the accepted file name: A - 1(fld 1 wv Cy5 - Cy5).tif
Returns:
ImageFileInfo
""" """
def create_image_info(image_tokens, tile_geometry): def create_image_info(image_tokens, tile_geometry):
fieldNum = image_tokens.tile - 1 fieldNum = image_tokens.tile - 1
...@@ -118,6 +292,11 @@ def create_image_info(image_tokens, tile_geometry): ...@@ -118,6 +292,11 @@ def create_image_info(image_tokens, tile_geometry):
img.setWell(image_tokens.well) img.setWell(image_tokens.well)
return img return img
"""
Tokenizes file names of all images in the directory.
Returns:
list of ImageTokens
"""
def parse_image_tokens(dir): def parse_image_tokens(dir):
image_tokens_list = [] image_tokens_list = []
dir_path = dir.getPath() dir_path = dir.getPath()
...@@ -133,7 +312,13 @@ def parse_image_tokens(dir): ...@@ -133,7 +312,13 @@ def parse_image_tokens(dir):
pass # extension not recognized pass # extension not recognized
return image_tokens_list return image_tokens_list
"""
Parameters:
- image_tokens_list - list of ImageTokens for each image
- tile_geometry - (rows, columns) tuple describing the matrix of tiles (aka fields or sides) in the well
Returns:
list of ImageFileInfo
"""
def create_image_infos(image_tokens_list, tile_geometry): def create_image_infos(image_tokens_list, tile_geometry):
images = [] images = []
for image_tokens in image_tokens_list: for image_tokens in image_tokens_list:
...@@ -141,108 +326,15 @@ def create_image_infos(image_tokens_list, tile_geometry): ...@@ -141,108 +326,15 @@ def create_image_infos(image_tokens_list, tile_geometry):
images.append(image) images.append(image)
return images return images
# ------------ # ---------------------
# Generic utility
# ------------
def rfind(text, pattern):
ix = text.rfind(pattern)
ensurePatternFound(ix, text, pattern)
return ix
def find(text, pattern):
ix = text.find(pattern)
ensurePatternFound(ix, text, pattern)
return ix
def ensurePatternFound(ix, file, pattern):
if ix == -1:
raise Exception("Cannot find '" + pattern + "' pattern in file name '" + file + "'")
def extract_file_basename(filename):
lastDot = filename.rfind(".")
if lastDot != -1:
return filename[0:lastDot]
else:
return filename
def get_file_ext(file):
return os.path.splitext(file)[1][1:].lower()
def find_file_by_ext(incoming_file, expected_ext):
if not incoming_file.isDirectory():
return None
incoming_path = incoming_file.getPath()
for file in os.listdir(incoming_path):
ext = get_file_ext(file)
if ext.upper() == expected_ext.upper():
return File(incoming_path, file)
return None
def find_dir(incoming_file, dir_name_marker):
if not incoming_file.isDirectory():
return None
incoming_path = incoming_file.getPath()
for file in os.listdir(incoming_path):
if dir_name_marker.upper() in file.upper():
return File(incoming_path, file)
return None
def get_random_string():
return str(int(random.random()*1000000000))
def get_tmp_dir(incoming, label):
dropbox_parent_dir = incoming.getParentFile().getParent()
tmp_dir = File(dropbox_parent_dir, "tmp")
if not os.path.exists(tmp_dir.getPath()):
os.mkdir(tmp_dir.getPath())
tmp_labeled_dir = File(tmp_dir, label + ".tmp." + get_random_string())
os.mkdir(tmp_labeled_dir.getPath())
return tmp_labeled_dir
# ------------
# Generic dataset registration
# ------------
class Dataset:
sample_code = ""
sample_space = ""
dataset_type = ""
file_format = ""
is_measured = True
parent_dataset_code = ""
def set_dataset_details(dataset, registration_details):
data_set_info = registration_details.getDataSetInformation()
data_set_info.setSpaceCode(dataset.sample_space)
data_set_info.setSampleCode(dataset.sample_code)
if (dataset.parent_dataset_code):
data_set_info.setParentDataSetCodes([dataset.parent_dataset_code])
registration_details.setFileFormatType(FileFormatType(dataset.file_format));
registration_details.setDataSetType(DataSetType(dataset.dataset_type));
registration_details.setMeasuredData(dataset.is_measured);
return registration_details
# ------------
# Image dataset registration
# ------------
class ImageDataset: # extends Dataset
tile_num = -1
tile_rows_number = -1
tile_columns_number = -1
images = None
channels = None
def get_available_channels(images):
channel_codes = {}
for image in images:
channel_codes[image.getChannelCode()] = 1
channels = []
for channelCode in channel_codes.keys():
channels.append(Channel(channelCode, channelCode))
return channels
"""
Extracts all images from the incoming directory.
Parameters:
incoming - java.io.File, folder with images
Returns:
ImageDataset
"""
def create_image_dataset(incoming): def create_image_dataset(incoming):
dataset = ImageDataset() dataset = ImageDataset()
...@@ -263,19 +355,15 @@ def create_image_dataset(incoming): ...@@ -263,19 +355,15 @@ def create_image_dataset(incoming):
return dataset return dataset
def create_image_dataset_details(image_dataset): """
registration_details = factory.createImageRegistrationDetails() Extracts all overlay images from the overlays_dir directory.
Parameters:
set_dataset_details(image_dataset, registration_details) overlays_dir - java.io.File, folder with
image_dataset - ImageDataset, image dataset to which the overlay dataset belongs
data_set_info = registration_details.getDataSetInformation() img_dataset_code - string, code of the image dataset to which the overlay dataset belongs
data_set_info.setTileGeometry(image_dataset.tile_rows_number, image_dataset.tile_columns_number) Returns:
data_set_info.setImages(image_dataset.images) ImageDataset
data_set_info.setChannels(image_dataset.channels) """
return registration_details
# ---------------------
def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code): def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code):
dataset = ImageDataset() dataset = ImageDataset()
...@@ -297,18 +385,34 @@ def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code): ...@@ -297,18 +385,34 @@ def create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code):
return dataset return dataset
"""
Creates registration details of the image overlays dataset.
Parameters:
overlays_dir - java.io.File, folder with
image_dataset - ImageDataset, image dataset to which the overlay dataset belongs
img_dataset_code - string, code of the image dataset to which the overlay dataset belongs
Returns:
DataSetRegistrationDetails
"""
def create_overlay_dataset_details(overlays_dir, image_dataset, img_dataset_code): def create_overlay_dataset_details(overlays_dir, image_dataset, img_dataset_code):
overlay_dataset = create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code) overlay_dataset = create_overlay_dataset(overlays_dir, image_dataset, img_dataset_code)
overlay_dataset_details = create_image_dataset_details(overlay_dataset) overlay_dataset_details = create_image_dataset_details(overlay_dataset)
data_set_info = overlay_dataset_details.getDataSetInformation() data_set_info = overlay_dataset_details.getDataSetInformation()
config = ImageStorageConfiguraton.createDefault() config = ImageStorageConfiguraton.createDefault()
# channels will be connected to the dataset
config.setStoreChannelsOnExperimentLevel(False) config.setStoreChannelsOnExperimentLevel(False)
data_set_info.setImageStorageConfiguraton(config) data_set_info.setImageStorageConfiguraton(config)
return overlay_dataset_details return overlay_dataset_details
# --------------------- # ---------------------
"""
Creates the analysis dataset description.
The dataset will be connected to the specified sample and parent dataset.
Returns:
Dataset
"""
def create_analysis_dataset(sample_space, sample_code, parent_dataset_code): def create_analysis_dataset(sample_space, sample_code, parent_dataset_code):
dataset = Dataset() dataset = Dataset()
...@@ -320,12 +424,18 @@ def create_analysis_dataset(sample_space, sample_code, parent_dataset_code): ...@@ -320,12 +424,18 @@ def create_analysis_dataset(sample_space, sample_code, parent_dataset_code):
dataset.parent_dataset_code = parent_dataset_code dataset.parent_dataset_code = parent_dataset_code
return dataset return dataset
"""
Creates registration details of the analysis dataset.
Returns:
DataSetRegistrationDetails
"""
def create_analysis_dataset_details(sample_space, sample_code, parent_dataset_code): def create_analysis_dataset_details(sample_space, sample_code, parent_dataset_code):
analysis_registration_details = factory.createRegistrationDetails() analysis_registration_details = factory.createRegistrationDetails()
analysis_dataset = create_analysis_dataset(sample_space, sample_code, parent_dataset_code) analysis_dataset = create_analysis_dataset(sample_space, sample_code, parent_dataset_code)
set_dataset_details(analysis_dataset, analysis_registration_details) set_dataset_details(analysis_dataset, analysis_registration_details)
return analysis_registration_details return analysis_registration_details
""" registers sample if it does not exist already """
def register_sample_if_necessary(space_code, project_code, experiment_code, sample_code): def register_sample_if_necessary(space_code, project_code, experiment_code, sample_code):
openbis = state.getOpenBisService() openbis = state.getOpenBisService()
sampleIdentifier = SampleIdentifier.create(space_code, sample_code) sampleIdentifier = SampleIdentifier.create(space_code, sample_code)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment