diff --git a/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py b/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py index 425699d6a9b090120fac7e0bdcb79ffe39cecbd6..e8462a5d0c87efb745763dcc6e02736a992d2c7e 100644 --- a/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py +++ b/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py @@ -9,10 +9,19 @@ def retrieve_experiment(tr, exp_id): exp = tr.getExperiment(exp_id) return exp +def extract_strains(): + """Extract the strains from the data sheet""" + strains = [] + lines = timeSeriesData.getRawDataLines() + for i in range(1, len(lines)): + line = lines[i] + strains.append(line[0]) + return ",".join(strains) + def assign_properties(dataset, metadata): """Assign properties to the data set from information in the data.""" propertyNameMap = { - "STRAIN":"STRAIN", + "STRAIN_NAMES": "STRAIN_NAMES", "TIMEPOINT TYPE": "TIMEPOINT_TYPE", "CELL LOCATION": "CELL_LOCATION", "VALUE TYPE": "VALUE_TYPE", @@ -53,6 +62,8 @@ timeSeriesData = TimeSeriesDataExcel.createTimeSeriesDataExcel(incoming.getAbsol # create the data set and assign the metadata from the file dataset = tr.createNewDataSet("OD600") metadata = timeSeriesData.getMetadataMap() +# Strains are not in the metadata, but in the data, so extract them +metadata["STRAIN_NAMES"] = extract_strains() assign_properties(dataset, metadata) # Convert the data into a tsv file, and put that and the original data into the data set diff --git a/eu_basynthec/dist/etc/growth-profiles/data-set-validator.py b/eu_basynthec/dist/etc/growth-profiles/data-set-validator.py index c09a86937a65acebfb74653345354dad5a569e7c..c3c470a0a616bf5b86e76f5734e55ae9cb16e724 100644 --- a/eu_basynthec/dist/etc/growth-profiles/data-set-validator.py +++ b/eu_basynthec/dist/etc/growth-profiles/data-set-validator.py @@ -1,120 +1,25 @@ -import os -import re -import sys -import java.io.File -from java.io import IOException -from java.lang import IllegalArgumentException -from ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation import ValidationError, ValidationScriptRunner -from ch.systemsx.cisd.openbis.dss.generic.shared.utils import ExcelFileReader -from ch.systemsx.cisd.common.logging import LogFactory, LogCategory - -operationLog = LogFactory.getLogger(LogCategory.OPERATION, ValidationScriptRunner) -OPENBIS_METADATA_SHEET_NAME = "openbis-metadata" -OPENBIS_DATA_SHEET_NAME = "openbis-data" - -class TimeSeriesDataExcel: - """ - An abstraction for accessing time series data following the BaSynthec conventions - from an Excel file. This class ported from Java, thus the camelCase naming. - """ - def __init__(self, file, fileReader): - self.file = file - self.fileReader = fileReader - - def getRawMetadataLines(self): - """Get the raw lines of the metadata sheet.""" - try: - return self.fileReader.readLines(OPENBIS_METADATA_SHEET_NAME); - except IOException, ex: - operationLog.error("Could not read data from [file: " + self.file.getPath() + ", sheet: " - + OPENBIS_METADATA_SHEET_NAME + "]", ex) - return [] - - def getRawDataLines(self): - """Get the raw lines of the data sheet.""" - try: - return self.fileReader.readLines(OPENBIS_DATA_SHEET_NAME) - except IOException, ex: - operationLog.error("Could not read data from [file: " + file.getPath() + ", sheet: " - + OPENBIS_DATA_SHEET_NAME + "]", ex) - return [] - - def getMetadataMap(self): - """ - Return the metadata has a hashmap, with all keys uppercased. - - Assumes the metadata sheet corresponds to the following format: [Property] [Value] [... stuff - that can be ignored], that is the property name is in column 1 and property value is in - column 2, and everything else can be ignored. - """ - metadataMap = {} - metadataLines = self.getRawMetadataLines() - - # Skip the first line, this is just the header - for i in range(1, metadataLines.size()): - line = metadataLines.get(i) - value = line[1]; - if "BLANK" == value: - value = None - metadataMap[line[0].upper()] = value - return metadataMap - -def create_time_series_excel(fileName): - """Factory method for the TimeSeriesData object. Returns None if it cannot be created""" - file = java.io.File(fileName) - try: - workbook = ExcelFileReader.getExcelWorkbook(file) - fileReader = ExcelFileReader(workbook, True) - return TimeSeriesDataExcel(file, fileReader) - except IllegalArgumentException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - except IOException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - return None - - - -class ValidationHelper: - """ - Methods for simplifying validation in BaSynthec. - This class ported from Java, thus the camelCase naming. - """ - def __init__(self, metadataMap, errors): - self.metadataMap = metadataMap - self.errors = errors - - def checkIsSpecified(self, property, displayName): - if self.metadataMap.get(property) is None: - self.errors.append(ValidationError.createFileValidationError("A " + displayName - + " must be specified.")) - return False - return True - def validate_data(timeSeriesData, errors): dataLines = timeSeriesData.getRawDataLines() lineCount = 0 for line in dataLines: # The header needs to be Abs if lineCount is 0: - if line[0] != "Abs": - errors.append(createFileValidationError("The first data column must be 'Abs'")) + if line[0] != "Strain": + errors.append(createFileValidationError("The first data column must be 'Strain'")) break lineCount = lineCount + 1 continue # The compound id should be one of these forms - od600 = line[0] - if od600 != "OD600": - errors.append(createFileValidationError("Line " + str(lineCount + 1) + ", column 1 must be OD600 (instead of " + od600 + ").")) + strain = line[0] + if not isStrainIdValid(strain): + errors.append(createFileValidationError("Line " + str(lineCount + 1) + ", column 1 must be MGP[0-999] (instead of " + strain + ").")) lineCount = lineCount + 1 def validate_metadata(time_series_data, errors): metadata = time_series_data.getMetadataMap() validationHelper = ValidationHelper(metadata, errors) - # validate the strain - validationHelper.checkIsSpecified("STRAIN", "strain") - # validate the timepoint type if validationHelper.checkIsSpecified("TIMEPOINT TYPE", "time point type"): if metadata.get("TIMEPOINT TYPE").upper() not in ['EX', 'IN', 'SI']: diff --git a/eu_basynthec/dist/etc/metabolomics/data-set-validator.py b/eu_basynthec/dist/etc/metabolomics/data-set-validator.py index e6e549068a48c1f997516173276b87809cdd84fc..d6ef4b4cb27397593927016ee35a575f189a445c 100644 --- a/eu_basynthec/dist/etc/metabolomics/data-set-validator.py +++ b/eu_basynthec/dist/etc/metabolomics/data-set-validator.py @@ -1,95 +1,3 @@ -import os -import re -import sys -import java.io.File -from java.io import IOException -from java.lang import IllegalArgumentException -from ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation import ValidationError, ValidationScriptRunner -from ch.systemsx.cisd.openbis.dss.generic.shared.utils import ExcelFileReader -from ch.systemsx.cisd.common.logging import LogFactory, LogCategory - -operationLog = LogFactory.getLogger(LogCategory.OPERATION, ValidationScriptRunner) -OPENBIS_METADATA_SHEET_NAME = "openbis-metadata" -OPENBIS_DATA_SHEET_NAME = "openbis-data" - -class TimeSeriesDataExcel: - """ - An abstraction for accessing time series data following the BaSynthec conventions - from an Excel file. This class ported from Java, thus the camelCase naming. - """ - def __init__(self, file, fileReader): - self.file = file - self.fileReader = fileReader - - def getRawMetadataLines(self): - """Get the raw lines of the metadata sheet.""" - try: - return self.fileReader.readLines(OPENBIS_METADATA_SHEET_NAME); - except IOException, ex: - operationLog.error("Could not read data from [file: " + self.file.getPath() + ", sheet: " - + OPENBIS_METADATA_SHEET_NAME + "]", ex) - return [] - - def getRawDataLines(self): - """Get the raw lines of the data sheet.""" - try: - return self.fileReader.readLines(OPENBIS_DATA_SHEET_NAME) - except IOException, ex: - operationLog.error("Could not read data from [file: " + file.getPath() + ", sheet: " - + OPENBIS_DATA_SHEET_NAME + "]", ex) - return [] - - def getMetadataMap(self): - """ - Return the metadata has a hashmap, with all keys uppercased. - - Assumes the metadata sheet corresponds to the following format: [Property] [Value] [... stuff - that can be ignored], that is the property name is in column 1 and property value is in - column 2, and everything else can be ignored. - """ - metadataMap = {} - metadataLines = self.getRawMetadataLines() - - # Skip the first line, this is just the header - for i in range(1, metadataLines.size()): - line = metadataLines.get(i) - value = line[1]; - if "BLANK" == value: - value = None - metadataMap[line[0].upper()] = value - return metadataMap - -def create_time_series_excel(fileName): - """Factory method for the TimeSeriesData object. Returns None if it cannot be created""" - file = java.io.File(fileName) - try: - workbook = ExcelFileReader.getExcelWorkbook(file) - fileReader = ExcelFileReader(workbook, True) - return TimeSeriesDataExcel(file, fileReader) - except IllegalArgumentException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - except IOException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - return None - - - -class ValidationHelper: - """ - Methods for simplifying validation in BaSynthec. - This class ported from Java, thus the camelCase naming. - """ - def __init__(self, metadataMap, errors): - self.metadataMap = metadataMap - self.errors = errors - - def checkIsSpecified(self, property, displayName): - if self.metadataMap.get(property) is None: - self.errors.append(ValidationError.createFileValidationError("A " + displayName - + " must be specified.")) - return False - return True - def validate_data(time_series_data, errors): chebiRegex = re.compile("^CHEBI:[0-9]+") diff --git a/eu_basynthec/dist/etc/proteomics/data-set-validator.py b/eu_basynthec/dist/etc/proteomics/data-set-validator.py index c1150d736ebceb5234e38ff17f84daa1ba3a9d74..3b4b73e074b380653e688e811ad10db8e2dfdfa8 100644 --- a/eu_basynthec/dist/etc/proteomics/data-set-validator.py +++ b/eu_basynthec/dist/etc/proteomics/data-set-validator.py @@ -1,95 +1,3 @@ -import os -import re -import sys -import java.io.File -from java.io import IOException -from java.lang import IllegalArgumentException -from ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation import ValidationError, ValidationScriptRunner -from ch.systemsx.cisd.openbis.dss.generic.shared.utils import ExcelFileReader -from ch.systemsx.cisd.common.logging import LogFactory, LogCategory - -operationLog = LogFactory.getLogger(LogCategory.OPERATION, ValidationScriptRunner) -OPENBIS_METADATA_SHEET_NAME = "openbis-metadata" -OPENBIS_DATA_SHEET_NAME = "openbis-data" - -class TimeSeriesDataExcel: - """ - An abstraction for accessing time series data following the BaSynthec conventions - from an Excel file. This class ported from Java, thus the camelCase naming. - """ - def __init__(self, file, fileReader): - self.file = file - self.fileReader = fileReader - - def getRawMetadataLines(self): - """Get the raw lines of the metadata sheet.""" - try: - return self.fileReader.readLines(OPENBIS_METADATA_SHEET_NAME); - except IOException, ex: - operationLog.error("Could not read data from [file: " + self.file.getPath() + ", sheet: " - + OPENBIS_METADATA_SHEET_NAME + "]", ex) - return [] - - def getRawDataLines(self): - """Get the raw lines of the data sheet.""" - try: - return self.fileReader.readLines(OPENBIS_DATA_SHEET_NAME) - except IOException, ex: - operationLog.error("Could not read data from [file: " + file.getPath() + ", sheet: " - + OPENBIS_DATA_SHEET_NAME + "]", ex) - return [] - - def getMetadataMap(self): - """ - Return the metadata has a hashmap, with all keys uppercased. - - Assumes the metadata sheet corresponds to the following format: [Property] [Value] [... stuff - that can be ignored], that is the property name is in column 1 and property value is in - column 2, and everything else can be ignored. - """ - metadataMap = {} - metadataLines = self.getRawMetadataLines() - - # Skip the first line, this is just the header - for i in range(1, metadataLines.size()): - line = metadataLines.get(i) - value = line[1]; - if "BLANK" == value: - value = None - metadataMap[line[0].upper()] = value - return metadataMap - -def create_time_series_excel(fileName): - """Factory method for the TimeSeriesData object. Returns None if it cannot be created""" - file = java.io.File(fileName) - try: - workbook = ExcelFileReader.getExcelWorkbook(file) - fileReader = ExcelFileReader(workbook, True) - return TimeSeriesDataExcel(file, fileReader) - except IllegalArgumentException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - except IOException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - return None - - - -class ValidationHelper: - """ - Methods for simplifying validation in BaSynthec. - This class ported from Java, thus the camelCase naming. - """ - def __init__(self, metadataMap, errors): - self.metadataMap = metadataMap - self.errors = errors - - def checkIsSpecified(self, property, displayName): - if self.metadataMap.get(property) is None: - self.errors.append(ValidationError.createFileValidationError("A " + displayName - + " must be specified.")) - return False - return True - def validate_data(time_series_data, errors): gene_locus_regex = re.compile("^BSU[0-9]+|^BSU_misc_RNA_[0-9]+|^VMG_[0-9]+_[0-9]+(_c)?") column_header_regex = re.compile("(\+|-)?[0-9]+::(value|mean|median|std|var|error|iqr)") diff --git a/eu_basynthec/dist/etc/shared/shared-classes.py b/eu_basynthec/dist/etc/shared/shared-classes.py new file mode 100644 index 0000000000000000000000000000000000000000..07f7cecb4e3b60380657a2b35d7d4f84b25054d7 --- /dev/null +++ b/eu_basynthec/dist/etc/shared/shared-classes.py @@ -0,0 +1,101 @@ +import os +import re +import sys +import java.io.File +from java.io import IOException +from java.lang import IllegalArgumentException +from ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation import ValidationError, ValidationScriptRunner +from ch.systemsx.cisd.openbis.dss.generic.shared.utils import ExcelFileReader +from ch.systemsx.cisd.common.logging import LogFactory, LogCategory + +operationLog = LogFactory.getLogger(LogCategory.OPERATION, ValidationScriptRunner) +OPENBIS_METADATA_SHEET_NAME = "openbis-metadata" +OPENBIS_DATA_SHEET_NAME = "openbis-data" + +class TimeSeriesDataExcel: + """ + An abstraction for accessing time series data following the BaSynthec conventions + from an Excel file. This class ported from Java, thus the camelCase naming. + """ + def __init__(self, file, fileReader): + self.file = file + self.fileReader = fileReader + + def getRawMetadataLines(self): + """Get the raw lines of the metadata sheet.""" + try: + return self.fileReader.readLines(OPENBIS_METADATA_SHEET_NAME); + except IOException, ex: + operationLog.error("Could not read data from [file: " + self.file.getPath() + ", sheet: " + + OPENBIS_METADATA_SHEET_NAME + "]", ex) + return [] + + def getRawDataLines(self): + """Get the raw lines of the data sheet.""" + try: + return self.fileReader.readLines(OPENBIS_DATA_SHEET_NAME) + except IOException, ex: + operationLog.error("Could not read data from [file: " + file.getPath() + ", sheet: " + + OPENBIS_DATA_SHEET_NAME + "]", ex) + return [] + + def getMetadataMap(self): + """ + Return the metadata has a hashmap, with all keys uppercased. + + Assumes the metadata sheet corresponds to the following format: [Property] [Value] [... stuff + that can be ignored], that is the property name is in column 1 and property value is in + column 2, and everything else can be ignored. + """ + metadataMap = {} + metadataLines = self.getRawMetadataLines() + + # Skip the first line, this is just the header + for i in range(1, metadataLines.size()): + line = metadataLines.get(i) + value = line[1]; + if "BLANK" == value: + value = None + metadataMap[line[0].upper()] = value + return metadataMap + +def create_time_series_excel(fileName): + """Factory method for the TimeSeriesData object. Returns None if it cannot be created.""" + file = java.io.File(fileName) + try: + workbook = ExcelFileReader.getExcelWorkbook(file) + fileReader = ExcelFileReader(workbook, True) + return TimeSeriesDataExcel(file, fileReader) + except IllegalArgumentException, ex: + operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) + except IOException, ex: + operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) + return None + + + +class ValidationHelper: + """ + Methods for simplifying validation in BaSynthec. + This class is ported from Java, thus the camelCase naming. + """ + def __init__(self, metadataMap, errors): + self.metadataMap = metadataMap + self.errors = errors + + def checkIsSpecified(self, property, displayName): + """Verify that a property is specified; if not, add a validation error to the list.""" + if self.metadataMap.get(property) is None: + self.errors.append(ValidationError.createFileValidationError("A " + displayName + + " must be specified.")) + return False + return True + + +strainIdRegex = re.compile("^MGP[0-9]{1,3}") +def isStrainIdValid(strainId): + """Return true if the strain id passes validation (has the form MGP[:digit:]{1,3})""" + match = strainIdRegex.match(strainId) + if match is None: + return False + return match.end() == len(strainId) diff --git a/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py b/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py index 5cdf0840db0c90ff7924b16fc815d952788a3528..b42c2fdb592174caba11535c09fc11aef39273eb 100644 --- a/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py +++ b/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py @@ -1,95 +1,3 @@ -import os -import re -import sys -import java.io.File -from java.io import IOException -from java.lang import IllegalArgumentException -from ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation import ValidationError, ValidationScriptRunner -from ch.systemsx.cisd.openbis.dss.generic.shared.utils import ExcelFileReader -from ch.systemsx.cisd.common.logging import LogFactory, LogCategory - -operationLog = LogFactory.getLogger(LogCategory.OPERATION, ValidationScriptRunner) -OPENBIS_METADATA_SHEET_NAME = "openbis-metadata" -OPENBIS_DATA_SHEET_NAME = "openbis-data" - -class TimeSeriesDataExcel: - """ - An abstraction for accessing time series data following the BaSynthec conventions - from an Excel file. This class ported from Java, thus the camelCase naming. - """ - def __init__(self, file, fileReader): - self.file = file - self.fileReader = fileReader - - def getRawMetadataLines(self): - """Get the raw lines of the metadata sheet.""" - try: - return self.fileReader.readLines(OPENBIS_METADATA_SHEET_NAME); - except IOException, ex: - operationLog.error("Could not read data from [file: " + self.file.getPath() + ", sheet: " - + OPENBIS_METADATA_SHEET_NAME + "]", ex) - return [] - - def getRawDataLines(self): - """Get the raw lines of the data sheet.""" - try: - return self.fileReader.readLines(OPENBIS_DATA_SHEET_NAME) - except IOException, ex: - operationLog.error("Could not read data from [file: " + file.getPath() + ", sheet: " - + OPENBIS_DATA_SHEET_NAME + "]", ex) - return [] - - def getMetadataMap(self): - """ - Return the metadata has a hashmap, with all keys uppercased. - - Assumes the metadata sheet corresponds to the following format: [Property] [Value] [... stuff - that can be ignored], that is the property name is in column 1 and property value is in - column 2, and everything else can be ignored. - """ - metadataMap = {} - metadataLines = self.getRawMetadataLines() - - # Skip the first line, this is just the header - for i in range(1, metadataLines.size()): - line = metadataLines.get(i) - value = line[1]; - if "BLANK" == value: - value = None - metadataMap[line[0].upper()] = value - return metadataMap - -def create_time_series_excel(fileName): - """Factory method for the TimeSeriesData object. Returns None if it cannot be created""" - file = java.io.File(fileName) - try: - workbook = ExcelFileReader.getExcelWorkbook(file) - fileReader = ExcelFileReader(workbook, True) - return TimeSeriesDataExcel(file, fileReader) - except IllegalArgumentException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - except IOException, ex: - operationLog.error("Could not open file [" + fileName + "] as Excel data.", ex) - return None - - - -class ValidationHelper: - """ - Methods for simplifying validation in BaSynthec. - This class ported from Java, thus the camelCase naming. - """ - def __init__(self, metadataMap, errors): - self.metadataMap = metadataMap - self.errors = errors - - def checkIsSpecified(self, property, displayName): - if self.metadataMap.get(property) is None: - self.errors.append(ValidationError.createFileValidationError("A " + displayName - + " must be specified.")) - return False - return True - def validate_data(time_series_data, errors): gene_locus_regex = re.compile("^BSU[0-9]+|^BSU_misc_RNA_[0-9]+|^VMG_[0-9]+_[0-9]+(_c)?") dataLines = time_series_data.getRawDataLines() diff --git a/eu_basynthec/sourceTest/examples/OD600-BadData.xlsx b/eu_basynthec/sourceTest/examples/OD600-BadData.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..77472c04d3163f7920b325721eaa0ad7c375796a Binary files /dev/null and b/eu_basynthec/sourceTest/examples/OD600-BadData.xlsx differ diff --git a/eu_basynthec/sourceTest/examples/OD600-Example.xlsx b/eu_basynthec/sourceTest/examples/OD600-Example.xlsx index d6e35b4b893639449069f18023af3fb499c40204..4f83f9ce28149ce85353f1e0456bfaddba7aa093 100644 Binary files a/eu_basynthec/sourceTest/examples/OD600-Example.xlsx and b/eu_basynthec/sourceTest/examples/OD600-Example.xlsx differ diff --git a/eu_basynthec/sourceTest/examples/OD600-Template.xlsx b/eu_basynthec/sourceTest/examples/OD600-Template.xlsx index 08dbb1ba136006d62f943514eda36cfbc8d33a88..be43feb2f882179020b274e7f3ca47119135cad8 100644 Binary files a/eu_basynthec/sourceTest/examples/OD600-Template.xlsx and b/eu_basynthec/sourceTest/examples/OD600-Template.xlsx differ diff --git a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/growthprofiles/OD600DataSetRegistratorTest.java b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/growthprofiles/OD600DataSetRegistratorTest.java new file mode 100644 index 0000000000000000000000000000000000000000..eaa8a49f4226f153e3236ee4e1ef825b239f189d --- /dev/null +++ b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/growthprofiles/OD600DataSetRegistratorTest.java @@ -0,0 +1,132 @@ +/* + * Copyright 2011 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package eu.basynthec.cisd.dss.growthprofiles; + +import static ch.systemsx.cisd.common.Constants.IS_FINISHED_PREFIX; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.jmock.Expectations; +import org.testng.annotations.Test; + +import ch.systemsx.cisd.common.filesystem.FileUtilities; +import ch.systemsx.cisd.common.test.RecordingMatcher; +import ch.systemsx.cisd.etlserver.registrator.AbstractJythonDataSetHandlerTest; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.builders.ExperimentBuilder; +import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationResult; +import ch.systemsx.cisd.openbis.generic.shared.dto.NewExternalData; +import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty; +import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifierFactory; + +/** + * @author Chandrasekhar Ramakrishnan + */ +public class OD600DataSetRegistratorTest extends AbstractJythonDataSetHandlerTest +{ + private static final String STRAIN_NAMES_PROP = "STRAIN_NAMES"; + + private static final String DATA_SET_CODE = "data-set-code"; + + private static final DataSetType DATA_SET_TYPE = new DataSetType("OD600"); + + private static final String EXPERIMENT_IDENTIFIER = "/TEST/TEST/TEST"; + + @Test + public void testSimpleTransaction() throws IOException + { + setUpHomeDataBaseExpectations(); + Properties properties = + createThreadPropertiesRelativeToScriptsFolder("data-set-handler.py", + "dist/etc/shared/shared-classes.py,dist/etc/growth-profiles/data-set-validator.py"); + createHandler(properties, false, true); + createData(); + + ExperimentBuilder builder = new ExperimentBuilder().identifier(EXPERIMENT_IDENTIFIER); + final Experiment experiment = builder.getExperiment(); + final RecordingMatcher<ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails> atomicatOperationDetails = + new RecordingMatcher<ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails>(); + context.checking(new Expectations() + { + { + one(openBisService).createDataSetCode(); + will(returnValue(DATA_SET_CODE)); + atLeast(1).of(openBisService).tryToGetExperiment( + new ExperimentIdentifierFactory(experiment.getIdentifier()) + .createIdentifier()); + will(returnValue(experiment)); + + one(dataSetValidator).assertValidDataSet(DATA_SET_TYPE, + new File(new File(stagingDirectory, DATA_SET_CODE), "data")); + one(openBisService).performEntityOperations(with(atomicatOperationDetails)); + will(returnValue(new AtomicEntityOperationResult())); + } + }); + + handler.handle(markerFile); + + assertEquals(1, atomicatOperationDetails.recordedObject().getDataSetRegistrations().size()); + + NewExternalData dataSet = + atomicatOperationDetails.recordedObject().getDataSetRegistrations().get(0); + + assertEquals(DATA_SET_CODE, dataSet.getCode()); + assertEquals(DATA_SET_TYPE, dataSet.getDataSetType()); + + HashMap<String, NewProperty> propertyMap = + getDataSetPropertiesMap(dataSet.getDataSetProperties()); + NewProperty strainProperty = propertyMap.get(STRAIN_NAMES_PROP); + + assertNotNull(strainProperty); + assert null != strainProperty; + assertEquals("MGP1,MGP100,MGP20,MGP999", strainProperty.getValue()); + context.assertIsSatisfied(); + } + + private void createData() throws IOException + { + File dataFile = new File("sourceTest/examples/OD600-Example.xlsx"); + FileUtils.copyFileToDirectory(dataFile, workingDirectory); + incomingDataSetFile = new File(workingDirectory, dataFile.getName()); + + markerFile = new File(workingDirectory, IS_FINISHED_PREFIX + dataFile.getName()); + FileUtilities.writeToFile(markerFile, ""); + } + + protected HashMap<String, NewProperty> getDataSetPropertiesMap( + List<NewProperty> dataSetProperties) + { + HashMap<String, NewProperty> propertyMap = new HashMap<String, NewProperty>(); + for (NewProperty prop : dataSetProperties) + { + propertyMap.put(prop.getPropertyCode(), prop); + } + return propertyMap; + } + + @Override + protected String getRegistrationScriptsFolderPath() + { + return "dist/etc/growth-profiles/"; + } +} diff --git a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/growthprofiles/OD600ValidatorTest.java b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/growthprofiles/OD600ValidatorTest.java index de98a23d1928e942f60f0c590258888a89c99185..3d4059fb0361ead8a98d8d60b0b82da41e2b7734 100644 --- a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/growthprofiles/OD600ValidatorTest.java +++ b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/growthprofiles/OD600ValidatorTest.java @@ -31,7 +31,7 @@ import ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation.ValidationS public class OD600ValidatorTest extends AssertJUnit { private static final String[] VALIDATION_SCRIPT_PATH = new String[] - { "dist/etc/growth-profiles/data-set-validator.py" }; + { "dist/etc/shared/shared-classes.py", "dist/etc/growth-profiles/data-set-validator.py" }; @Test public void testGoodData() @@ -43,6 +43,20 @@ public class OD600ValidatorTest extends AssertJUnit assertTrue("The example should have no errors", errors.isEmpty()); } + @Test + public void testBadData() + { + ValidationScriptRunner scriptRunner = + ValidationScriptRunner.createValidatorFromScriptPaths(VALIDATION_SCRIPT_PATH); + List<ValidationError> errors = + scriptRunner.validate(new File("sourceTest/examples/OD600-BadData.xlsx")); + assertEquals("The bad data should have two errors", 2, errors.size()); + assertEquals("Line 2, column 1 must be MGP[0-999] (instead of OD600).", errors.get(0) + .getErrorMessage()); + assertEquals("Line 3, column 1 must be MGP[0-999] (instead of MGP1000).", errors.get(1) + .getErrorMessage()); + } + @Test public void testTemplate() { @@ -50,11 +64,7 @@ public class OD600ValidatorTest extends AssertJUnit ValidationScriptRunner.createValidatorFromScriptPaths(VALIDATION_SCRIPT_PATH); List<ValidationError> errors = scriptRunner.validate(new File("sourceTest/examples/OD600-Template.xlsx")); - if (errors.size() > 0) - { - System.out.println(errors); - } - assertEquals("The template should have five errors", 5, errors.size()); + assertEquals("The template should have five errors", 4, errors.size()); } } diff --git a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/metabolomics/MetabolomicsValidatorTest.java b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/metabolomics/MetabolomicsValidatorTest.java index 6d2fcf11532b3d7fef72af490035311ad4223f0d..8dc2ba733bd7ed9eb27c41b82ed5e74d371aa452 100644 --- a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/metabolomics/MetabolomicsValidatorTest.java +++ b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/metabolomics/MetabolomicsValidatorTest.java @@ -31,7 +31,7 @@ import ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation.ValidationS public class MetabolomicsValidatorTest extends AssertJUnit { private static final String[] VALIDATION_SCRIPT_PATH = new String[] - { "dist/etc/metabolomics/data-set-validator.py" }; + { "dist/etc/shared/shared-classes.py", "dist/etc/metabolomics/data-set-validator.py" }; @Test public void testGoodData() diff --git a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/proteomics/ProteomicsValidatorTest.java b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/proteomics/ProteomicsValidatorTest.java index d75f588fc76cf87c67781bf237a98c32b1e6f40e..2a41d4361169c7fbad6ac98a6e082dc29c3ab685 100644 --- a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/proteomics/ProteomicsValidatorTest.java +++ b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/proteomics/ProteomicsValidatorTest.java @@ -31,7 +31,7 @@ import ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation.ValidationS public class ProteomicsValidatorTest extends AssertJUnit { private static final String[] VALIDATION_SCRIPT_PATH = new String[] - { "dist/etc/proteomics/data-set-validator.py" }; + { "dist/etc/shared/shared-classes.py", "dist/etc/proteomics/data-set-validator.py" }; @Test public void testGoodData() diff --git a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsValidatorTest.java b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsValidatorTest.java index e6c76a7c28328c4b61e3c88839cc1913dd64e43d..1b009c1724585e0dced241ca08be1e4af6d6c3fa 100644 --- a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsValidatorTest.java +++ b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsValidatorTest.java @@ -31,7 +31,7 @@ import ch.systemsx.cisd.openbis.dss.generic.shared.api.v1.validation.ValidationS public class TranscriptomicsValidatorTest extends AssertJUnit { private static final String[] VALIDATION_SCRIPT_PATH = new String[] - { "dist/etc/transcriptomics/data-set-validator.py" }; + { "dist/etc/shared/shared-classes.py", "dist/etc/transcriptomics/data-set-validator.py" }; @Test public void testGoodData()