Skip to content
Snippets Groups Projects
Commit f36de667 authored by barillac's avatar barillac
Browse files

sinergia dropbox scripts

SVN: 28275
parent 3931b36d
No related branches found
No related tags found
No related merge requests found
import os, glob, re, csv, time, shutil, sys
from time import *
from datetime import *
incoming =sys.argv[1]
#The siRNA csv files are renamed to use the the same well names as in openBIS, instead of w1, w2, etc
def get_sirna_wells(incoming):
for csvfile in glob.glob(os.path.join(incoming, 'w*_sirna.csv')):
(dirName, fileName) = os.path.split(csvfile)
(basename, extension) = os.path.splitext(fileName)
token_list = re.split(r"[_]",basename)
c_well = token_list[0]
other_well = token_list[1]
control_well=''
well=''
if (c_well == "w1"):
control_well = "A1"
if (c_well == "w2"):
control_well = "A2"
if (other_well == "w3"):
well = "A3"
if (other_well == "w4"):
well = "A4"
if (other_well == "w5"):
well = "A5"
if (other_well == "w6"):
well = "A6"
if (other_well == "w7"):
well = "B1"
if (other_well == "w8"):
well = "B2"
if (other_well == "w9"):
well = "B3"
if (other_well == "w10"):
well = "B4"
if (other_well == "w11"):
well = "B5"
if (other_well == "w12"):
well = "B6"
if (other_well == "w13"):
well = "C1"
if (other_well == "w14"):
well = "C2"
if (other_well == "w15"):
well = "C3"
if (other_well == "w16"):
well = "C4"
if (other_well == "w17"):
well = "C5"
if (other_well == "w18"):
well = "C6"
if (other_well == "w19"):
well = "D1"
if (other_well == "w20"):
well = "D2"
if (other_well == "w21"):
well = "D3"
if (other_well == "w22"):
well = "D4"
if (other_well == "w23"):
well = "D5"
if (other_well == "w24"):
well = "D6"
new_csv = incoming + "/" + control_well + "_" + well + "_sirna.csv"
if not os.path.exists(new_csv):
shutil.move(csvfile, new_csv)
get_sirna_wells(incoming)
#The gene csv files are renamed to use the same well names as in openBIS, instead of w1, w2, etc
def get_gene_wells(incoming):
for csvfile in glob.glob(os.path.join(incoming, 'w*_gene.csv')):
(dirName, fileName) = os.path.split(csvfile)
(basename, extension) = os.path.splitext(fileName)
token_list = re.split(r"[_]",basename)
c_well = token_list[0]
other_well = token_list[1]
control_well=''
gene_well=''
if (c_well == "w1"):
control_well = "A1"
if (c_well == "w2"):
control_well = "A2"
if (other_well == "w4-w5-w6"):
gene_well = "A4-A5-A6"
if (other_well == "w7-w8-w9"):
gene_well = "B1-B2-B3"
if (other_well == "w10-w11-w12"):
gene_well = "B4-B5-B6"
if (other_well == "w13-w14-w15"):
gene_well = "C1-C2-C3"
if (other_well == "w16-w17-w18"):
gene_well = "C4-C5-C6"
if (other_well == "w19-w20-w21"):
gene_well = "D1-D2-D3"
if (other_well == "w22-w23-w24"):
gene_well = "D4-D5-D6"
gene_csv = incoming + "/" + control_well + "_" + gene_well + "_gene.csv"
if not os.path.exists(gene_csv):
shutil.move(csvfile, gene_csv)
get_gene_wells(incoming)
# The plate code is extracted from the file OriginalDataDirectory.txt. This is the plate that contains the images produced by Ludovico and the matlab files given by Fethallah
def extractPlateCode(incoming):
plateCode = ''
for textfile in glob.glob(os.path.join(incoming, 'OriginalDataDirectory.txt')):
text = open(textfile, "r")
lineIndex =0
for line in text:
lineIndex=lineIndex+1
if re.match('PLATE',line):
token_list = re.split(r"[\t]",line)
partialCode = token_list[0]
plateCode = partialCode[0:9]
return plateCode
extractPlateCode(incoming)
#The file Info_plates_sirna_genes.txt contains info on what genes and siRNA are contained in each well of each plate. If the plate code extracted above is the same as one of the plate codes in the file, the info regarding that plate is extracted
def extractInfoPlates(incoming):
well_list=[]
plate_list=[]
sirna_list=[]
gene_list=[]
for textfile in glob.glob(os.path.join(incoming, 'Info_plates_sirna_genes.txt')):
text = open(textfile, "r")
lineIndex =0
for line in text:
lineIndex=lineIndex+1
token_list = re.split(r"[\t]",line)
token_list = [ item.strip() for item in token_list ]
token_list = filter(lambda x: len(x) > 0, token_list)
well = token_list[0]
plate = token_list[1]
sirna = token_list[2]
gene = token_list[3]
if (plate == extractPlateCode(incoming).strip()):
well_list.append(well)
plate_list.append(plate)
sirna_list.append(sirna)
gene_list.append(gene)
return well_list, plate_list, sirna_list, gene_list
extractInfoPlates(incoming)
#The single sirna csv files are combined into one global_siRNA.csv which contains also info on sirna and genes contained in each well
#The single gene csv files are combined into one global_gene.csv which contains also info on genes contained in each well
def parse_csv(incoming):
global_sirna_csv = incoming+"/global_siRNA.csv"
global_gene_csv = incoming+"/global_gene.csv"
f = open(global_sirna_csv, "a")
g = open(global_gene_csv, "a")
for csv_file in glob.glob(os.path.join(incoming, 'A*.csv')):
(dirName2, fileName2) = os.path.split(csv_file)
(basename2, extension2) = os.path.splitext(fileName2)
well_list = re.split(r"[_]",basename2)
control = well_list[0]
measure = well_list[1]
meas = measure[0:2]
csvfile = open(csv_file, "rb")
test = csv.reader(csvfile, delimiter=',', quotechar='"')
if (measure == "A3"):
for x, row in enumerate(test):
#if (x==0 and control == "A2"):
# s = "siRNA Well,"+ "Control Well," + "Gene," + "siRNA," + ','.join(row) +'\n'
# f.write(s)
if x !=0:
t = measure + "," + control + "," + "control gene," + "control siRNA" + "," + ",".join(row) +"\n"
f.write(t)
for i,j,k in zip(extractInfoPlates(incoming)[0],extractInfoPlates(incoming)[2],extractInfoPlates(incoming)[3]):
if (measure == i):
for x, row in enumerate(test):
if (x==0 and measure == 'A6' and control == 'A2'):
s = "siRNA Well,"+ "Control Well," + "Gene," + "siRNA," + ','.join(row) +'\n'
f.write(s)
if x !=0:
t = measure + "," + control + "," + k + "," + j + "," + ",".join(row) + "\n"
f.write(t)
f.close
for l,m in zip(extractInfoPlates(incoming)[0],extractInfoPlates(incoming)[3]):
if (meas == l):
for y, row in enumerate(test):
print "y, meas, l ", y, meas, control
if (y==0 and meas == "C1" and control == "A1"):
s = "siRNA Well,"+ "Control Well," + "Gene," + ','.join(row) +'\n'
# print s
g.write(s)
if y !=0:
t = measure + "," + control + "," + m + "," + ",".join(row) + "\n"
g.write(t)
g.close
parse_csv(incoming)
#! /usr/bin/env python
"""
Import analysis data in two datasets: one dataset for videos and one dataset for matlab files.
"""
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria
import os
import glob
import re
import time
import shutil, sys
from time import *
from datetime import *
print '###################################'
tz=localtime()[3]-gmtime()[3]
d=datetime.now()
print d.strftime("%Y-%m-%d %H:%M:%S GMT"+"%+.2d" % tz+":00")
def copyTextFile(incomingPath):
for textfile in glob.glob(os.path.join(incomingPath, 'OriginalDataDirectory.txt')):
rawDataFile = incomingPath + '/RawDataDirectory.txt'
shutil.copyfile(textfile, rawDataFile)
copyTextFile(incoming.getPath())
def extractSpaceCode(incomingPath):
spaceCode = "SINERGIA"
return spaceCode
def extractPlateCode(incomingPath):
for textfile in glob.glob(os.path.join(incomingPath, 'RawDataDirectory.txt')):
text = open(textfile, "r")
lineIndex =0
for line in text:
lineIndex=lineIndex+1
if re.match('PLATE', line):
token_list = re.split(r"[ ]",line)
token_list = [ item.strip() for item in token_list ]
token_list = filter(lambda x: len(x) > 0, token_list)
plateCode = token_list[0]
return plateCode
extractPlateCode(incoming.getPath())
def extractDataSetCode(incomingPath):
dataSetCode = ''
for textfile in glob.glob(os.path.join(incomingPath, 'RawDataDirectory.txt')):
text = open(textfile, "r")
lineIndex =0
for line in text:
lineIndex=lineIndex+1
# if re.match('/raid', line):
if re.match('/Users', line):
token_list = re.split(r"[/]",line)
token_list = [ item.strip() for item in token_list ]
token_list = filter(lambda x: len(x) > 0, token_list)
# dataSetCode = token_list[8] # right position for /raid
dataSetCode = token_list[10] #right position for local use
#plateCode = line
return dataSetCode
extractDataSetCode(incoming.getPath())
def get_videos(incomingPath):
directory = incomingPath + '/videos'
if not os.path.exists(directory):
os.makedirs(directory)
for mp4 in glob.glob(os.path.join(incomingPath, '*.mp4')):
(incomingPath, file) = os.path.split(mp4)
(filename, extension) = os.path.splitext(file)
stage= filename
shutil.move(incomingPath +'/'+file, directory)
for webm in glob.glob(os.path.join(incomingPath, '*.webm' )):
(incomingPath, file) = os.path.split(webm)
(filename, extension) = os.path.splitext(file)
stage= filename
shutil.move(incomingPath +'/'+file, directory)
for jpg in glob.glob(os.path.join(incomingPath, '*.jpg')):
(incomingPath, file) = os.path.split(jpg)
(filename, extension) = os.path.splitext(file)
stage= filename
shutil.move(incomingPath +'/'+file, directory)
for html in glob.glob(os.path.join(incomingPath, '*.html')):
(incomingPath, file) = os.path.split(html)
(filename, extension) = os.path.splitext(file)
stage= filename
shutil.move(incomingPath +'/'+file, directory)
get_videos(incoming.getPath())
def get_matfiles(incomingPath):
matDir = incomingPath + '/matfiles'
if not os.path.exists(matDir):
os.makedirs(matDir)
for mat in glob.glob(os.path.join(incomingPath, '*.mat')):
(incomingPath, file) = os.path.split(mat)
(filename, extension) = os.path.splitext(file)
stage = filename[:3]
shutil.move(incomingPath +'/'+file, matDir)
for txt in glob.glob(os.path.join(incomingPath, 'OriginalDataDirectory.txt')):
(incomingPath, file) = os.path.split(txt)
(filename, extension) = os.path.splitext(file)
stage = filename[:3]
shutil.move(incomingPath +'/'+file, matDir)
get_matfiles(incoming.getPath())
tr = service.transaction(incoming, factory)
incoming = tr.getIncoming()
data_set = tr.createNewDataSet()
data_set.setDataSetType("HCS_IMAGE_SEGMENTATION_TRACKING_FEATURES")
data_set2 = tr.createNewDataSet()
data_set2.setDataSetType("HCS_ANALYSIS_SEGMENTATION_AND_FEATURES")
sampleIdentifier = "/"+extractSpaceCode(incoming.getPath())+"/"+extractPlateCode(incoming.getPath())
print sampleIdentifier
plate = tr.getSample(sampleIdentifier)
data_set.setSample(plate)
data_set2.setSample(plate)
# Get the search service
search_service = tr.getSearchService()
sc = SearchCriteria()
sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, extractDataSetCode(incoming.getPath()) ));
foundDataSets = search_service.searchForDataSets(sc)
if foundDataSets.size() > 0:
data_set.setParentDatasets([ds.getDataSetCode() for ds in foundDataSets])
data_set2.setParentDatasets([ds.getDataSetCode() for ds in foundDataSets])
videoPath = incoming.getPath() + '/videos'
tr.moveFile(videoPath, data_set)
matPath = incoming.getPath() + '/matfiles'
tr.moveFile(matPath, data_set2)
\ No newline at end of file
import os, glob, re, csv, time, shutil
from time import *
from datetime import *
#from ch.systemsx.cisd.openbis.dss.etl.dto.api.v2 import *
from ch.systemsx.cisd.openbis.dss.etl.dto.api.v2 import SimpleFeatureVectorDataConfig
#from java.util import Properties
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria
from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchSubCriteria
from ch.systemsx.cisd.openbis.dss.etl.dto.api.v2 import FeatureListDataConfig
'''
Dropbox for importing a feature vector dataset and for creating feature lists datasets from there.
This dataset is set to be a child of the segmentation dataset produced by Fethallah.
'''
print '###################################'
tz=localtime()[3]-gmtime()[3]
d=datetime.now()
print d.strftime("%Y-%m-%d %H:%M:%S GMT"+"%+.2d" % tz+":00")
accuracyA1_sirna_list = []
accuracyA2_sirna_list =[]
KStestA2_sirna_list = []
KStestA1_sirna_list = []
KSdeltaA2_sirna_list = []
KSdeltaA1_sirna_list = []
KSpvalueA2_sirna_list = []
KSpvalueA1_sirna_list = []
feature_directionA2_sirna_list = []
feature_directionA1_sirna_list = []
accuracyA1_gene_list = []
accuracyA2_gene_list =[]
KStestA2_gene_list = []
KStestA1_gene_list = []
KSdeltaA2_gene_list = []
KSdeltaA1_gene_list = []
KSpvalueA2_gene_list = []
KSpvalueA1_gene_list = []
feature_directionA2_gene_list = []
feature_directionA1_gene_list = []
def process(transaction):
incoming = transaction.getIncoming()
# def copyTextFile(incoming):
# for textfile in glob.glob(os.path.join(incoming, 'OriginalDataDirectory.txt')):
# rawDataFile = incoming + '/RawDataDirectory.txt'
# shutil.copyfile(textfile, rawDataFile)
#
# copyTextFile(incoming.getPath())
#extract dataset code and plate of original image files from file OriginalDataDirectory.txt
def extractImageDataSetCode(incoming):
dataSetCode = ''
plateCode = ''
for textfile in glob.glob(os.path.join(incoming, 'OriginalDataDirectory.txt')):
text = open(textfile, "r")
lineIndex =0
for line in text:
lineIndex=lineIndex+1
if re.match('/raid', line):
# if re.match('/Users', line):
token_list = re.split(r"[/]",line)
token_list = [ item.strip() for item in token_list ]
token_list = filter(lambda x: len(x) > 0, token_list)
dataSetCode = token_list[8] #right position for raid is 8, for local use is 10
if re.match('PLATE',line):
plateCode = line
return dataSetCode, plateCode
extractImageDataSetCode(incoming.getPath())
# check if plate code extracted above is the same as one of those in file AnalysisFethallaExample_location.txt. If so, get the dataset code associated with that plate. This is the dataset
# that contains the analysis matlab files produced by Fethallah, which have been used by Riwal to perform his analysis, so the new dataset registered should be a child of Fethallah's dataset.
def extractSegmentationDataSetCode(incoming):
segmentationDataSetCode = ''
segmentationPlateCode = ''
for textfile in glob.glob(os.path.join(incoming, 'FethallahAnalysisOBLocation.txt')):
text = open(textfile, "r")
lineIndex =0
for line in text:
lineIndex=lineIndex+1
token_list = re.split(r"[\t]",line)
token_list = [ item.strip() for item in token_list ]
token_list = filter(lambda x: len(x) > 0, token_list)
segmentationPlateCode = token_list[1]
if (segmentationPlateCode == extractImageDataSetCode(incoming)[1].strip()):
segmentationDataSetCode = token_list[0]
return segmentationDataSetCode
extractSegmentationDataSetCode(incoming.getPath())
def parse_gene_csv(incoming):
for csv_file in glob.glob(os.path.join(incoming, 'A*gene.csv')):
(dirName2, fileName2) = os.path.split(csv_file)
(basename2, extension2) = os.path.splitext(fileName2)
well_list = re.split(r"[_]",basename2)
control = well_list[0]
measure = well_list[1]
csvfile = open(csv_file, "rb")
test = csv.reader(csvfile, delimiter=',', quotechar='"')
for i, row in enumerate(test):
if i !=0:
fnv = row[0]
accuracy_value = row[1]
KStest_value = row[2]
KSdelta_value = row[3]
KSpvalue_value = row[4]
feature_direction_value = row[5]
accuracyA2 = (fnv +"_G_ac_A2").upper()
KStestA2 = (fnv+"_G_KSt_A2").upper()
KSdeltaA2 = (fnv+"_G_KSd_A2").upper()
KSpvalueA2 = (fnv+"_G_KSp_A2").upper()
feature_directionA2 = (fnv+"_G_dir_A2").upper()
accuracyA1 = (fnv +"_G_ac_A1").upper()
KStestA1 = (fnv+"_G_KSt_A1").upper()
KSdeltaA1 = (fnv+"_G_KSd_A1").upper()
KSpvalueA1 = (fnv+"_G_KSp_A1").upper()
feature_directionA1 = (fnv+"_G_dir_A1").upper()
accuracyA2_gene_list.append(accuracyA2)
accuracyA1_gene_list.append(accuracyA1)
KStestA2_gene_list.append(KStestA2)
KStestA1_gene_list.append(KStestA1)
KSdeltaA2_gene_list.append(KSdeltaA2)
KSdeltaA1_gene_list.append(KSdeltaA1)
KSpvalueA2_gene_list.append(KSpvalueA2)
KSpvalueA1_gene_list.append(KSpvalueA1)
feature_directionA2_gene_list.append(feature_directionA2)
feature_directionA1_gene_list.append(feature_directionA1)
return accuracyA2_gene_list, accuracyA1_gene_list, KStestA2_gene_list, KStestA1_gene_list, KSdeltaA2_gene_list, KSdeltaA1_gene_list, KSpvalueA2_gene_list, KSpvalueA1_gene_list, feature_directionA2_gene_list, feature_directionA1_gene_list
parse_gene_csv(incoming.getPath())
def parse_sirna_csv(incoming):
for csv_file in glob.glob(os.path.join(incoming, 'A*sirna.csv')):
(dirName2, fileName2) = os.path.split(csv_file)
(basename2, extension2) = os.path.splitext(fileName2)
if re.search("-", basename2):
continue
else:
well_list = re.split(r"[_]",basename2)
control = well_list[0]
measure = well_list[1]
csvfile = open(csv_file, "rb")
test = csv.reader(csvfile, delimiter=',', quotechar='"')
for i, row in enumerate(test):
if i !=0:
fnv = row[0]
accuracy_value = row[1]
KStest_value = row[2]
KSdelta_value = row[3]
KSpvalue_value = row[4]
feature_direction_value = row[5]
accuracyA2 = (fnv +"_S_ac_A2").upper()
KStestA2 = (fnv+"_S_KSt_A2").upper()
KSdeltaA2 = (fnv+"_S_KSd_A2").upper()
KSpvalueA2 = (fnv+"_S_KSp_A2").upper()
feature_directionA2 = (fnv+"_S_dir_A2").upper()
accuracyA1 = (fnv +"_S_ac_A1").upper()
KStestA1 = (fnv+"_S_KSt_A1").upper()
KSdeltaA1 = (fnv+"_S_KSd_A1").upper()
KSpvalueA1 = (fnv+"_S_KSp_A1").upper()
feature_directionA1 = (fnv+"_S_dir_A1").upper()
accuracyA2_sirna_list.append(accuracyA2)
accuracyA1_sirna_list.append(accuracyA1)
KStestA2_sirna_list.append(KStestA2)
KStestA1_sirna_list.append(KStestA1)
KSdeltaA2_sirna_list.append(KSdeltaA2)
KSdeltaA1_sirna_list.append(KSdeltaA1)
KSpvalueA2_sirna_list.append(KSpvalueA2)
KSpvalueA1_sirna_list.append(KSpvalueA1)
feature_directionA2_sirna_list.append(feature_directionA2)
feature_directionA1_sirna_list.append(feature_directionA1)
return accuracyA2_sirna_list, accuracyA1_sirna_list, KStestA2_sirna_list, KStestA1_sirna_list, KSdeltaA2_sirna_list, KSdeltaA1_sirna_list, KSpvalueA2_sirna_list, KSpvalueA1_sirna_list, feature_directionA2_sirna_list, feature_directionA1_sirna_list
parse_sirna_csv(incoming.getPath())
def defineGeneFeatures(featuresBuilder, incoming):
for csv_file in glob.glob(os.path.join(incoming, 'global_gene.csv')):
csvf = open(csv_file,'r')
globcsv = csv.reader(csvf, delimiter=',')
globcsv.next()
result_accuracy = {} # accuracy_label => {measure_well => accuracy_value}
result_kstest ={} # kstest => {measure_well => kstest_value}
result_ksdelta = {} # ksdelta => {measure_well => ksdelta_value}
result_kspvalue ={} # kspvalue => {measure_well => kspvalue_value}
result_feature_direction ={}# feature_direction => {measure_well => feature_direction_value}
for row in globcsv:
measure_well = row[0]
group_well = re.split(r"[-]",measure_well)
group_well1 = group_well[0]
control_well = row[1]
feature_name = row[3]
accuracy_label = feature_name + "_G_ac"
accuracy_value = row[4]
kstest = feature_name + "_G_KSt"
kstest_value = row[5]
ksdelta = feature_name + "_G_KSd"
ksdelta_value = row[6]
kspvalue = feature_name + "_G_KSp"
kspvalue_value = row[7]
feature_direction = feature_name + "_G_dir"
feature_direction_value = row[8]
accuracy_key = "%s:%s" %(accuracy_label, control_well)
kstest_key = "%s:%s" %(kstest, control_well)
ksdelta_key = "%s:%s" %(ksdelta, control_well)
kspvalue_key ="%s:%s" %(kspvalue, control_well)
feature_direction_key = "%s:%s" %(feature_direction, control_well)
if not accuracy_key in result_accuracy:
result_accuracy[accuracy_key] = {}
result_accuracy[accuracy_key][group_well1] = accuracy_value
# if not kstest_key in result_kstest:
# result_kstest[kstest_key] = {}
#
# result_kstest[kstest_key][measure_well] = kstest_value
#
#
# if not ksdelta_key in result_ksdelta:
# result_ksdelta[ksdelta_key] = {}
#
# result_ksdelta[ksdelta_key][measure_well] = ksdelta_value
#
# if not kspvalue_key in result_kspvalue:
# result_kspvalue[kspvalue_key] = {}
#
# result_kspvalue[kspvalue_key][measure_well] = kspvalue_value
#
#
# if not feature_direction_key in result_feature_direction:
# result_feature_direction[feature_direction_key] = {}
#
# result_feature_direction[feature_direction_key][measure_well] = feature_direction_value
for feature in result_accuracy:
feature_accuracy = featuresBuilder.defineFeature(feature)
for well in result_accuracy[feature]:
value = result_accuracy[feature][well]
feature_accuracy.addValue(well, value)
# for feature_kst in result_kstest:
# feature_kstest = featuresBuilder.defineFeature(feature_kst)
# for well2 in result_kstest[feature_kst]:
# value2 = result_kstest[feature_kst][well2]
# feature_kstest.addValue(well2, value2)
#
#
# for feature_ksd in result_ksdelta:
# feature_ksdelta = featuresBuilder.defineFeature(feature_ksd)
# for well1 in result_ksdelta[feature_ksd]:
# value1 = result_ksdelta[feature_ksd][well1]
# feature_ksdelta.addValue(well1, value1)
#
# for feature_ksp in result_kspvalue:
# feature_kspvalue = featuresBuilder.defineFeature(feature_ksp)
# for well3 in result_kspvalue[feature_ksp]:
# value3 = result_kspvalue[feature_ksp][well3]
# feature_kspvalue.addValue(well3, value3)
#
# for feature_fd in result_feature_direction:
# feature_feature_direction= featuresBuilder.defineFeature(feature_fd)
# for well4 in result_feature_direction[feature_fd]:
# value4 = result_feature_direction[feature_fd][well4]
# feature_feature_direction.addValue(well4, value4)
for csv_file2 in glob.glob(os.path.join(incoming, 'global_siRNA.csv')):
csvf2 = open(csv_file2,'r')
globcsv2 = csv.reader(csvf2, delimiter=',')
globcsv2.next()
result_accuracy_sirna = {} # accuracy_label => {measure_well => accuracy_value}
result_kstest_sirna ={} # kstest => {measure_well => kstest_value}
result_ksdelta_sirna = {} # ksdelta => {measure_well => ksdelta_value}
result_kspvalue_sirna ={} # kspvalue => {measure_well => kspvalue_value}
result_feature_direction_sirna ={}# feature_direction => {measure_well => feature_direction_value}
for row in globcsv2:
measure_well_sirna = row[0]
control_well_sirna = row[1]
feature_name_sirna = row[4]
accuracy_label_sirna = feature_name_sirna + "_S_ac"
accuracy_value_sirna = row[5]
kstest_sirna = feature_name_sirna + "_S_KSt"
kstest_value_sirna = row[6]
ksdelta_sirna = feature_name_sirna + "_S_KSd"
ksdelta_value_sirna = row[7]
kspvalue_sirna = feature_name_sirna + "_S_KSp"
kspvalue_value_sirna = row[8]
feature_direction_sirna = feature_name_sirna + "_S_dir"
feature_direction_value_sirna = row[9]
accuracy_key_sirna = "%s:%s" %(accuracy_label_sirna, control_well_sirna)
kstest_key_sirna = "%s:%s" %(kstest_sirna, control_well_sirna)
ksdelta_key_sirna = "%s:%s" %(ksdelta_sirna, control_well_sirna)
kspvalue_key_sirna ="%s:%s" %(kspvalue_sirna, control_well_sirna)
feature_direction_key_sirna = "%s:%s" %(feature_direction_sirna, control_well_sirna)
if not accuracy_key_sirna in result_accuracy_sirna:
result_accuracy_sirna[accuracy_key_sirna] = {}
result_accuracy_sirna[accuracy_key_sirna][measure_well_sirna] = accuracy_value_sirna
# if not kstest_key in result_kstest:
# result_kstest[kstest_key] = {}
#
# result_kstest[kstest_key][measure_well] = kstest_value
#
#
# if not ksdelta_key in result_ksdelta:
# result_ksdelta[ksdelta_key] = {}
#
# result_ksdelta[ksdelta_key][measure_well] = ksdelta_value
#
# if not kspvalue_key in result_kspvalue:
# result_kspvalue[kspvalue_key] = {}
#
# result_kspvalue[kspvalue_key][measure_well] = kspvalue_value
#
#
# if not feature_direction_key in result_feature_direction:
# result_feature_direction[feature_direction_key] = {}
#
# result_feature_direction[feature_direction_key][measure_well] = feature_direction_value
for feature_sirna in result_accuracy_sirna:
feature_accuracy_sirna = featuresBuilder.defineFeature(feature_sirna)
for well_sirna in result_accuracy_sirna[feature_sirna]:
value_sirna = result_accuracy_sirna[feature_sirna][well_sirna]
feature_accuracy_sirna.addValue(well_sirna, value_sirna)
# for feature_kst in result_kstest:
# feature_kstest = featuresBuilder.defineFeature(feature_kst)
# for well2 in result_kstest[feature_kst]:
# value2 = result_kstest[feature_kst][well2]
# feature_kstest.addValue(well2, value2)
#
#
# for feature_ksd in result_ksdelta:
# feature_ksdelta = featuresBuilder.defineFeature(feature_ksd)
# for well1 in result_ksdelta[feature_ksd]:
# value1 = result_ksdelta[feature_ksd][well1]
# feature_ksdelta.addValue(well1, value1)
#
# for feature_ksp in result_kspvalue:
# feature_kspvalue = featuresBuilder.defineFeature(feature_ksp)
# for well3 in result_kspvalue[feature_ksp]:
# value3 = result_kspvalue[feature_ksp][well3]
# feature_kspvalue.addValue(well3, value3)
#
# for feature_fd in result_feature_direction:
# feature_feature_direction= featuresBuilder.defineFeature(feature_fd)
# for well4 in result_feature_direction[feature_fd]:
# value4 = result_feature_direction[feature_fd][well4]
# feature_feature_direction.addValue(well4, value4)
config = SimpleFeatureVectorDataConfig()
featuresBuilder = config.featuresBuilder
defineGeneFeatures(featuresBuilder, incoming.getPath())
analysisDataset = transaction.createNewFeatureVectorDataSet(config, incoming)
rawImagesDataSetSample1 = transaction.getDataSet(extractSegmentationDataSetCode(incoming.getPath())).getSample()
rawImagesDataSetSample = transaction.getSample('/SINERGIA/' + rawImagesDataSetSample1.getCode())
# plateIdentifier = "/SINERGIA/PLATE1-G1-10X"
# test = transaction.getSample("/SINERGIA/PLATE1-G1-10X")
# analysisDataset.setSample(test)
analysisDataset.setSample(rawImagesDataSetSample)
search_service = transaction.getSearchService()
sc = SearchCriteria()
sc.addMatchClause(SearchCriteria.MatchClause.createAttributeMatch(SearchCriteria.MatchClauseAttribute.CODE, extractSegmentationDataSetCode(incoming.getPath()) ));
foundDataSets = search_service.searchForDataSets(sc)
if foundDataSets.size() > 0:
analysisDataset.setParentDatasets([ds.getDataSetCode() for ds in foundDataSets])
# store the original file in the dataset.
transaction.moveFile(incoming.getPath(), analysisDataset)
######################## Create Feature lists Datasets ###########################################
config_accA2 = FeatureListDataConfig()
config_accA2.setName("siRNA-based accuracy (reference well: A2)");
config_accA2.setFeatureList(accuracyA2_sirna_list)
config_accA2.setContainerDataSet(analysisDataset)
transaction.createNewFeatureListDataSet(config_accA2)
# config_accA1 = FeatureListDataConfig()
# config_accA1.setName("siRNA-based accuracy (reference well: A1)");
# config_accA1.setFeatureList(accuracyA1_sirna_list)
# config_accA1.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_accA1)
#
# config_gene_accA2 = FeatureListDataConfig()
# config_gene_accA2.setName("gene-based accuracy (reference well: A2)");
# config_gene_accA2.setFeatureList(accuracyA2_gene_list)
# config_gene_accA2.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_gene_accA2)
#
# config_gene_accA1 = FeatureListDataConfig()
# config_gene_accA1.setName("gene-based accuracy (reference well: A1)");
# config_gene_accA1.setFeatureList(accuracyA1_gene_list)
# config_gene_accA1.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_gene_accA1)
#
#
#
# config_KStestA2 = FeatureListDataConfig()
# config_KStestA2.setName("KStest (reference well: A2)");
# config_KStestA2.setFeatureList(KStestA2_sirna_list)
# config_KStestA2.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_KStestA2)
#
# config_KStestA1 = FeatureListDataConfig()
# config_KStestA1.setName("KStest (reference well: A1)");
# config_KStestA1.setFeatureList(KStestA1_sirna_list)
# config_KStestA1.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_KStestA1)
#
#
# config_KSdeltaA2 = FeatureListDataConfig()
# config_KSdeltaA2.setName("KSdelta (reference well: A2)");
# config_KSdeltaA2.setFeatureList(KSdeltaA2_sirna_list)
# config_KSdeltaA2.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_KSdeltaA2)
#
# config_KSdeltaA1 = FeatureListDataConfig()
# config_KSdeltaA1.setName("KSdelta (reference well: A1)");
# config_KSdeltaA1.setFeatureList(accuracyA1_sirna_list)
# config_KSdeltaA1.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_KSdeltaA1)
#
#
# config_KSpvalueA2 = FeatureListDataConfig()
# config_KSpvalueA2.setName("KSpvalue (reference well: A2)");
# config_KSpvalueA2.setFeatureList(KSpvalueA2_sirna_list)
# config_KSpvalueA2.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_KSpvalueA2)
#
# config_KSpvalueA1 = FeatureListDataConfig()
# config_KSpvalueA1.setName("KSpvalue (reference well: A1)");
# config_KSpvalueA1.setFeatureList(KSpvalueA1_sirna_list)
# config_KSpvalueA1.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_KSpvalueA1)
#
# config_feature_directionA2 = FeatureListDataConfig()
# config_feature_directionA2.setName("Direction (reference well: A2)");
# config_feature_directionA2.setFeatureList(feature_directionA2_sirna_list)
# config_feature_directionA2.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_feature_directionA2)
#
# config_feature_directionA1 = FeatureListDataConfig()
# config_feature_directionA1.setName("Direction (reference well: A1)");
# config_feature_directionA1.setFeatureList(feature_directionA1_sirna_list)
# config_feature_directionA1.setContainerDataSet(analysisDataset)
# transaction.createNewFeatureListDataSet(config_feature_directionA1)
#! /usr/bin/env python
"""
The implementation of the Sinergia dropbox.
Sinergia data is uploaded in a format where many files are provided in a single folder containing images and a metadata file (with the ".nd" extension). The dropbox implementation takes this format, extracts metadata and converts the file structure to a different one that is more manageable. The resulting file structure contains diretories for each well, containing directories for each channel. The images are located inside the chanel directory.
"""
import os
import glob
import re
import time
import shutil
from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import SimpleImageDataConfig
from ch.systemsx.cisd.openbis.dss.etl.dto.api.v1 import ImageMetadata
from ch.systemsx.cisd.hdf5.h5ar import HDF5ArchiverFactory
from java.io import File
ORIGINAL_THUMBNAILS_FOLDER="thumbnails-original"
Uncomplete_Experiments = "Uncomplete_Experiments"
class SinergiaPlate:
def __init__(self):
self.stages = []
class SinergiaStage:
def __init__(self):
self.channels = []
self.well = ""
# dummy value generated underneath
self.tileNumber = 0
self.siteString = 0
class SinergiaChannel:
def __init__(self):
self.name = []
self.timepoints = []
def move_file_to_dir(srcFile, destDir):
destFile = destDir + "/" + os.path.basename(srcFile)
if srcFile == destFile:
return
if os.path.exists(destFile):
raise Exception("Cowardly refusing to override existing file %(destFile)s with source file %(srcFile)s." % vars())
os.rename(srcFile, destFile)
def process_stage(plate, files):
"""Process all the files for one stage of a plate, returning the stage"""
stage = SinergiaStage()
red = SinergiaChannel()
red.name = "red"
redfiles = filter(lambda x: re.match('.*red_.*', x), files) #find all files for the red channel
red.timepoints = redfiles
stage.channels.append(red)
green = SinergiaChannel()
green.name = "green"
greenfiles = filter(lambda x: re.match('.*green_.*', x), files) #find all files for the green channel
green.timepoints = greenfiles
stage.channels.append(green)
return stage
def parse_plate_metadata(incomingPath, pattern_start, sinergia_plate):
dummyTileCounter = 0;
for ndFileName in glob.glob( os.path.join(incomingPath, pattern_start + '.nd')):
ndfile = open(ndFileName, "r")
lineIndex = 0
for line in ndfile:
lineIndex = lineIndex + 1
if re.match('"Stage', line):
token_list = re.split(r"[\"\:\\\n\,]",line)
token_list = [ item.strip() for item in token_list ]
token_list = filter(lambda x: len(x) > 0, token_list)
stageString = token_list[0]
try:
stageIdx = int( stageString[len("Stage"):] ) - 1
except ValueError:
raise Exception("Cannot parse stage number from '%(stageString)s: %(ndFileName)s, line %(lineIndex)i'" % vars())
try:
stage = sinergia_plate.stages[stageIdx]
except IndexError:
raise Exception("Invalid stage number '%(stageString)s: %(ndFileName)s, line %(lineIndex)i'. No corresponding TIF file was found." % vars())
stage.well = token_list[1]
stage.siteString = token_list[2]
stage.tileNumber = (dummyTileCounter % 10) + 1
dummyTileCounter = dummyTileCounter + 1
ndfile.close()
def move_to_original_thumbnail_folder(incomingPath, thumbFiles):
thumbFolder = incomingPath + "/" + ORIGINAL_THUMBNAILS_FOLDER
if not os.path.exists(thumbFolder):
os.makedirs(thumbFolder)
for thumbFile in thumbFiles:
move_file_to_dir(thumbFile, thumbFolder)
def create_glob_pattern_start(incomingPath):
"""
Return the Experiment{*} part of a pattern used by functions that need
to process only the valid experiment
"""
for ndfile in glob.glob(os.path.join(incomingPath, '*.nd')):
(incomingPath, file) = os.path.split(ndfile)
(filename, extension) = os.path.splitext(file)
pattern = filename + '*.TIF'
match_count = len(glob.glob1(incomingPath, pattern))
# if match_count < 29000:
if match_count < 46560:
unwanted = incomingPath+ "/../../" + Uncomplete_Experiments
if not os.path.exists(unwanted):
os.makedirs(unwanted)
shutil.move(ndfile, unwanted)
#continue
else:
ret = filename
return ret
def is_thumbnail(fileName):
return "_thumb_" in fileName
def remove_uncomplete_experiments(incomingPath):
pattern_start = create_glob_pattern_start(incomingPath)
for tif in glob.glob(os.path.join(incomingPath, '*.TIF')):
(incomingPath, file) = os.path.split(tif)
(filename, extension) = os.path.splitext(file)
token_list = re.split('_', file)
for token in token_list:
if re.match('Exp', token):
Experiment = token
if Experiment != pattern_start:
unwanted = incomingPath+"/../../" + Uncomplete_Experiments
if not os.path.exists(unwanted):
os.makedirs(unwanted)
shutil.move(tif, unwanted)
shutil.rmtree(unwanted)
remove_uncomplete_experiments(incoming.getPath())
def process_plate(incomingPath):
"""Look at all the files in the incoming path and group them into plates"""
sinergia_plate = SinergiaPlate()
remove_uncomplete_experiments(incomingPath)
for stage_number in range(1, 241):
pattern_start = create_glob_pattern_start(incomingPath)
sinergia_plate.pattern_start = pattern_start
pattern = pattern_start + "*_s" + str(stage_number) +'_*.TIF'
files = glob.glob(os.path.join(incomingPath, pattern))
thumbFiles = filter(is_thumbnail, files)
imageFiles = filter(lambda x: not is_thumbnail(x), files)
stage = process_stage(sinergia_plate, imageFiles)
move_to_original_thumbnail_folder(incomingPath, thumbFiles)
sinergia_plate.stages.append(stage)
parse_plate_metadata(incomingPath, pattern_start, sinergia_plate)
return sinergia_plate
def get_directory_for_image_file(stageIdx, channelName):
fullPath = incoming.getPath() + "/" + str(stageIdx) + "/" + channelName
if not os.path.exists(fullPath):
os.makedirs(fullPath)
return fullPath
def transform_plate_file_structure(plate):
for idx, stage in enumerate(plate.stages):
for channel in stage.channels:
for imageFile in channel.timepoints:
directory = get_directory_for_image_file(idx + 1, channel.name)
move_file_to_dir(imageFile, directory)
class SinergiaImageDataSetConfig(SimpleImageDataConfig):
def __init__(self, sinergia_plate):
self.sinergia_plate = sinergia_plate
def extractImageMetadata(self, imagePath):
(dirName, filename) = os.path.split(imagePath)
(basename, extension) = os.path.splitext(filename)
if is_thumbnail(basename):
return None
if not basename.startswith(self.sinergia_plate.pattern_start):
return None
image_tokens = ImageMetadata()
token_dict = {}
for token in basename.split("_"):
token_dict[token[:1]] = token[1:]
channelName = token_dict["w"]
if "1LED green" == channelName:
channelCode = "LIFE ACT-GFP"
elif "2LED red" == channelName:
channelCode = "NLS-mCHERRY"
else:
channelCode = channelName
image_tokens.channelCode = channelCode
stageIdx = int(token_dict["s"]) - 1
image_tokens.well = sinergia_plate.stages[stageIdx].well
image_tokens.tileNumber = sinergia_plate.stages[stageIdx].tileNumber
image_tokens.timepoint = int(token_dict["t"])
return image_tokens
def get_or_create_bis_plate(tr, plateName,incomingPath):
spaceCode = "SINERGIA"
#plateIdentifier = "/" + spaceCode + "/" + plateName + "-REP"
plateIdentifier = "/" + spaceCode + "/" + plateName + "-10x"
plate = tr.getSample(plateIdentifier)
(pathName, dirName) = os.path.split(incomingPath)
if not plate:
token_list = re.split('-', dirName)
for token in token_list:
if re.match('G', token):
groupNum=token[1:]
groupIdentifier = "/" + spaceCode + "/SIRNA_TIMELAPSES_10X/GROUP-" + groupNum
plate = tr.createNewSample(plateIdentifier, 'PLATE')
plate.setPropertyValue("$PLATE_GEOMETRY", "24_WELLS_4X6")
exp = tr.getExperiment(groupIdentifier)
if not exp:
exp = tr.createNewExperiment(groupIdentifier, 'SIRNA_HCS')
exp.setPropertyValue("DESCRIPTION", "siRNA screening: timelapses")
#exp = tr.getExperiment("/SINERGIA/SIRNA_MOVIES/GROUP-1")
plate.setExperiment(exp)
return plate;
def archive_thumbnails(incomingPath):
thumbnailDir = os.path.join(incomingPath, "thumbnails-original")
thumbnailContainer = os.path.join(incomingPath, "thumbnails-original.h5ar")
if os.path.isdir(thumbnailDir):
archiver = HDF5ArchiverFactory.open(thumbnailContainer)
archiver.archiveFromFilesystem(File(thumbnailDir))
archiver.close()
shutil.rmtree(thumbnailDir)
sinergia_plate = process_plate(incoming.getPath())
transform_plate_file_structure(sinergia_plate)
tr = service.transaction(incoming, factory)
if incoming.isDirectory():
archive_thumbnails(incoming.getPath())
imageDatasetConfig = SinergiaImageDataSetConfig(sinergia_plate)
imageDatasetConfig.setImageLibrary("IJ")
imageDatasetConfig.setRawImageDatasetType()
imageDatasetConfig.setGenerateImageRepresentationsUsingImageResolutions(["64x52"])
imageDataSetDetails = factory.createImageRegistrationDetails(imageDatasetConfig, incoming)
plate = get_or_create_bis_plate(tr, incoming.getName().upper(), incoming.getPath())
dataSet = tr.createNewDataSet(imageDataSetDetails)
dataSet.setSample(plate)
tr.moveFile(incoming.getPath(), dataSet)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment