From ec62495a011d57acdb4b1ceb5d04dc5ee143d1d5 Mon Sep 17 00:00:00 2001 From: cramakri <cramakri> Date: Wed, 23 Jun 2010 09:58:35 +0000 Subject: [PATCH] LMS-1584 Implemented migrator for CSV and Genedata feature vectors. SVN: 16684 --- screening/etc/service.properties | 15 +- .../AbstractFeatureVectorMigrator.java | 227 ++++++++++++++++++ .../CsvFeatureVectorMigrator.java | 166 +++++++++++++ .../FeatureVectorStorageProcessor.java | 90 ------- ...reVectorStorageProcessorConfiguration.java | 110 +++++++++ .../etl/genedata/FeatureStorageProcessor.java | 13 +- .../GenedataFeatureVectorMigrator.java | 137 +++++++++++ ...enedataFormatToCanonicalFeatureVector.java | 3 +- ...ataFormatToCanonicalFeatureVectorTest.java | 1 - 9 files changed, 665 insertions(+), 97 deletions(-) create mode 100644 screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/AbstractFeatureVectorMigrator.java create mode 100644 screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/CsvFeatureVectorMigrator.java create mode 100644 screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessorConfiguration.java create mode 100644 screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFeatureVectorMigrator.java rename screening/source/java/ch/systemsx/cisd/openbis/dss/etl/{featurevector => genedata}/GenedataFormatToCanonicalFeatureVector.java (98%) diff --git a/screening/etc/service.properties b/screening/etc/service.properties index bf6905f8aee..af1f3e1b198 100644 --- a/screening/etc/service.properties +++ b/screening/etc/service.properties @@ -141,7 +141,7 @@ screening-dss-api-exporter-servlet.path = /rmi-datastore-server-screening-api-v1 # --------------------------------------------------------------------------- -maintenance-plugins=hierarchical-storage-updater, data-set-clean-up, migrator +maintenance-plugins=hierarchical-storage-updater, data-set-clean-up, migrator, csv-feature-migrator # the plugin which is run periodically to create a mirror structure of the store with the same files # but with user-readable structure of directories @@ -167,6 +167,19 @@ migrator.bds-image-db.channel-names = dapi, gfp migrator.bds-original-relocator.class = ch.systemsx.cisd.openbis.dss.etl.bdsmigration.BDSOriginalDataRelocatorMigrator migrator.bds-remover.class = ch.systemsx.cisd.openbis.dss.etl.bdsmigration.BDSDataRemoverMigrator +csv-feature-migrator.class = ch.systemsx.cisd.etlserver.plugins.ChainedDataSetMigrationTask +csv-feature-migrator.execute-only-once = true +csv-feature-migrator.storeRoot = ${storeroot-dir} +csv-feature-migrator.migrators = feature-image-db +csv-feature-migrator.feature-image-db.class = ch.systemsx.cisd.openbis.dss.etl.featurevector.CsvFeatureVectorMigrator +csv-feature-migrator.feature-image-db.data-source = imaging-db +# semi-colon (;) by default +#csv-feature-migrator.feature-image-db.separator = , +#csv-feature-migrator.feature-image-db.ignore-comments = true +#csv-feature-migrator.feature-image-db.well-name-row = row +#csv-feature-migrator.feature-image-db.well-name-col = col +#csv-feature-migrator.feature-image-db.well-name-col-is-alphanum = true + # --------------------------------------------------------------------------- # DROPBOXES CONFIGURATION # --------------------------------------------------------------------------- diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/AbstractFeatureVectorMigrator.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/AbstractFeatureVectorMigrator.java new file mode 100644 index 00000000000..ede05697cd1 --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/AbstractFeatureVectorMigrator.java @@ -0,0 +1,227 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.dss.etl.featurevector; + +import java.io.File; +import java.util.HashMap; +import java.util.List; +import java.util.Properties; + +import javax.sql.DataSource; + +import net.lemnik.eodsql.QueryTool; + +import ch.systemsx.cisd.etlserver.plugins.IMigrator; +import ch.systemsx.cisd.openbis.dss.etl.ScreeningContainerDatasetInfo; +import ch.systemsx.cisd.openbis.dss.etl.dataaccess.IImagingUploadDAO; +import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; +import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; +import ch.systemsx.cisd.openbis.generic.shared.dto.SimpleDataSetInformationDTO; +import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier; +import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SpaceIdentifier; +import ch.systemsx.cisd.openbis.plugin.screening.shared.basic.dto.ScreeningConstants; + +/** + * Imports individual data sets into the imaging db. + * + * @author Chandrasekhar Ramakrishnan + */ +public abstract class AbstractFeatureVectorMigrator implements IMigrator +{ + protected final IImagingUploadDAO dao; + + protected final IEncapsulatedOpenBISService openBisService; + + protected final List<SimpleDataSetInformationDTO> knownDataSets; + + protected final HashMap<String, SimpleDataSetInformationDTO> knownDataSetsByCode; + + public AbstractFeatureVectorMigrator(Properties properties) + { + DataSource dataSource = ServiceProvider.getDataSourceProvider().getDataSource(properties); + dao = QueryTool.getQuery(dataSource, IImagingUploadDAO.class); + openBisService = ServiceProvider.getOpenBISService(); + knownDataSets = openBisService.listDataSets(); + knownDataSetsByCode = + new HashMap<String, SimpleDataSetInformationDTO>(knownDataSets.size()); + + initializeDataSetsByCode(); + } + + /** + * Create a map of associations from code to data set info. Assumes knownDataSets and been + * initialized. + */ + private void initializeDataSetsByCode() + { + for (SimpleDataSetInformationDTO dataSetInfo : knownDataSets) + { + knownDataSetsByCode.put(dataSetInfo.getDataSetCode(), dataSetInfo); + } + } + + public String getDescription() + { + return "uploading feature vectors to the imaging database"; + } + + public boolean migrate(File dataset) + { + AbstractMigrationDecision decision = createMigrationDecision(dataset); + decision.process(); + if (false == decision.shouldMigrate) + { + // Vacuously true + return true; + } + + AbstractImageDbImporter importer; + importer = + createImporter(createScreeningDatasetInfo(decision.getDataSetInfo()), + decision.fileToMigrate); + + importer.doImport(); + + return importer.isSuccessful; + } + + protected abstract AbstractMigrationDecision createMigrationDecision(File dataset); + + protected abstract AbstractImageDbImporter createImporter( + ScreeningContainerDatasetInfo dataSetInfo, File fileToMigrate); + + protected ScreeningContainerDatasetInfo createScreeningDatasetInfo( + SimpleDataSetInformationDTO dataSetInfo) + { + SampleIdentifier sampleId = + new SampleIdentifier(new SpaceIdentifier(dataSetInfo.getDatabaseInstanceCode(), + dataSetInfo.getGroupCode()), dataSetInfo.getSampleCode()); + Sample sample = openBisService.tryGetSampleWithExperiment(sampleId); + assert sample != null : "no sample connected to a dataset"; + + Experiment experiment = sample.getExperiment(); + ScreeningContainerDatasetInfo info = new ScreeningContainerDatasetInfo(); + info.setExperimentPermId(experiment.getPermId()); + info.setContainerPermId(sample.getPermId()); + info.setDatasetPermId(dataSetInfo.getDataSetCode()); + + return info; + } + + /** + * Helper class for figuring out what to do with files + * + * @author Chandrasekhar Ramakrishnan + */ + protected abstract static class AbstractMigrationDecision + { + protected final File dataset; + + private final HashMap<String, SimpleDataSetInformationDTO> knownDataSetsByCode; + + private SimpleDataSetInformationDTO dataSetInfo; + + private boolean shouldMigrate = false; + + private File fileToMigrate = null; + + public AbstractMigrationDecision(File dataset, + HashMap<String, SimpleDataSetInformationDTO> knownDataSetsByCode) + { + this.dataset = dataset; + this.knownDataSetsByCode = knownDataSetsByCode; + } + + protected void setDataSetInfo(SimpleDataSetInformationDTO dataSetInfo) + { + this.dataSetInfo = dataSetInfo; + } + + public SimpleDataSetInformationDTO getDataSetInfo() + { + return dataSetInfo; + } + + // Figure out what to do with this file + public void process() + { + setDataSetInfo(tryDataSetInformation()); + if (null == getDataSetInfo()) + { + shouldMigrate = false; + return; + } + + // Only import this data set if it is of an analysis type + if (false == ScreeningConstants.IMAGE_ANALYSIS_DATASET_TYPE.equals(getDataSetInfo() + .getDataSetType())) + { + shouldMigrate = false; + return; + } + + // Figure out which file we need to migrate + fileToMigrate = tryFileToMigrate(); + + if (null == fileToMigrate) + { + shouldMigrate = false; + return; + } + + shouldMigrate = true; + return; + } + + protected abstract File tryFileToMigrate(); + + private SimpleDataSetInformationDTO tryDataSetInformation() + { + String dataSetCode = dataset.getName(); + SimpleDataSetInformationDTO dsInfo = knownDataSetsByCode.get(dataSetCode); + return dsInfo; + } + } + + /** + * Helper class for importing data into the image db. + * + * @author Chandrasekhar Ramakrishnan + */ + protected abstract static class AbstractImageDbImporter + { + protected final IImagingUploadDAO dao; + + protected final ScreeningContainerDatasetInfo screeningDataSetInfo; + + protected final File fileToMigrate; + + protected boolean isSuccessful = false; + + protected AbstractImageDbImporter(IImagingUploadDAO dao, + ScreeningContainerDatasetInfo screeningDataSetInfo, File fileToMigrate) + { + this.dao = dao; + this.screeningDataSetInfo = screeningDataSetInfo; + this.fileToMigrate = fileToMigrate; + } + + public abstract void doImport(); + } +} diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/CsvFeatureVectorMigrator.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/CsvFeatureVectorMigrator.java new file mode 100644 index 00000000000..929f0f96b74 --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/CsvFeatureVectorMigrator.java @@ -0,0 +1,166 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.dss.etl.featurevector; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Properties; + +import ch.systemsx.cisd.base.exceptions.IOExceptionUnchecked; +import ch.systemsx.cisd.etlserver.DefaultStorageProcessor; +import ch.systemsx.cisd.openbis.dss.etl.ScreeningContainerDatasetInfo; +import ch.systemsx.cisd.openbis.dss.etl.dataaccess.IImagingUploadDAO; +import ch.systemsx.cisd.openbis.dss.etl.featurevector.CSVToCanonicalFeatureVector.CSVToCanonicalFeatureVectorConfiguration; +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.tasks.DatasetFileLines; +import ch.systemsx.cisd.openbis.generic.shared.dto.SimpleDataSetInformationDTO; +import ch.systemsx.cisd.utils.CsvFileReaderHelper; + +/** + * @author Chandrasekhar Ramakrishnan + */ +public class CsvFeatureVectorMigrator extends AbstractFeatureVectorMigrator +{ + protected final FeatureVectorStorageProcessorConfiguration configuration; + + protected final CSVToCanonicalFeatureVectorConfiguration convertorConfig; + + /** + * @param properties + */ + public CsvFeatureVectorMigrator(Properties properties) + { + super(properties); + + this.configuration = new FeatureVectorStorageProcessorConfiguration(properties); + convertorConfig = + new CSVToCanonicalFeatureVectorConfiguration(configuration.getWellRow(), + configuration.getWellColumn(), configuration.isWellColAlphanumeric()); + } + + @Override + protected AbstractImageDbImporter createImporter(ScreeningContainerDatasetInfo dataSetInfo, + File fileToMigrate) + { + AbstractImageDbImporter importer; + + importer = new ImporterCsv(dao, dataSetInfo, fileToMigrate, configuration, convertorConfig); + + return importer; + } + + @Override + protected AbstractMigrationDecision createMigrationDecision(File dataset) + { + AbstractMigrationDecision decision = new MigrationDecision(dataset, knownDataSetsByCode); + return decision; + } + + /** + * Helper class for deciding if a file needs to be migrated. + * + * @author Chandrasekhar Ramakrishnan + */ + private static class MigrationDecision extends AbstractMigrationDecision + { + + /** + * @param dataset + * @param knownDataSetsByCode + */ + public MigrationDecision(File dataset, + HashMap<String, SimpleDataSetInformationDTO> knownDataSetsByCode) + { + super(dataset, knownDataSetsByCode); + } + + @Override + protected File tryFileToMigrate() + { + File originalDataset = DefaultStorageProcessor.getOriginalDirectory(dataset); + File[] files = originalDataset.listFiles(); + + if (files.length == 1) + { + File file = files[0]; + if (file.isDirectory()) + { + return null; + } + return file; + } + return null; + } + + } + + /** + * Helper class for importing CSV feature vector files + * + * @author Chandrasekhar Ramakrishnan + */ + private static class ImporterCsv extends AbstractImageDbImporter + { + private final FeatureVectorStorageProcessorConfiguration configuration; + + private final CSVToCanonicalFeatureVectorConfiguration convertorConfig; + + protected ImporterCsv(IImagingUploadDAO dao, + ScreeningContainerDatasetInfo screeningDataSetInfo, File fileToMigrate, + FeatureVectorStorageProcessorConfiguration configuration, + CSVToCanonicalFeatureVectorConfiguration convertorConfig) + { + super(dao, screeningDataSetInfo, fileToMigrate); + this.configuration = configuration; + this.convertorConfig = convertorConfig; + } + + @Override + public void doImport() + { + DatasetFileLines fileLines; + try + { + fileLines = getDatasetFileLines(fileToMigrate); + CSVToCanonicalFeatureVector convertor = + new CSVToCanonicalFeatureVector(fileLines, convertorConfig); + ArrayList<CanonicalFeatureVector> fvecs = convertor.convert(); + + FeatureVectorUploader uploader = + new FeatureVectorUploader(dao, screeningDataSetInfo); + uploader.uploadFeatureVectors(fvecs); + dao.commit(); + isSuccessful = true; + } catch (IOException ex) + { + throw new IOExceptionUnchecked(ex); + } + + } + + /** + * Return the tabular data as a DatasetFileLines. + */ + private DatasetFileLines getDatasetFileLines(File file) throws IOException + { + return CsvFileReaderHelper.getDatasetFileLines(file, configuration); + } + + } + +} diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessor.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessor.java index 625c764c768..319e82720a3 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessor.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessor.java @@ -27,7 +27,6 @@ import net.lemnik.eodsql.QueryTool; import ch.systemsx.cisd.base.exceptions.IOExceptionUnchecked; import ch.systemsx.cisd.common.mail.IMailClient; -import ch.systemsx.cisd.common.utilities.PropertyUtils; import ch.systemsx.cisd.etlserver.AbstractDelegatingStorageProcessor; import ch.systemsx.cisd.etlserver.ITypeExtractor; import ch.systemsx.cisd.openbis.dss.etl.ScreeningContainerDatasetInfo; @@ -37,7 +36,6 @@ import ch.systemsx.cisd.openbis.dss.generic.server.plugins.tasks.DatasetFileLine import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation; import ch.systemsx.cisd.utils.CsvFileReaderHelper; -import ch.systemsx.cisd.utils.CsvFileReaderHelper.ICsvFileReaderConfiguration; /** * Extract features from the file and store them in the database. @@ -46,94 +44,6 @@ import ch.systemsx.cisd.utils.CsvFileReaderHelper.ICsvFileReaderConfiguration; */ public class FeatureVectorStorageProcessor extends AbstractDelegatingStorageProcessor { - private static class FeatureVectorStorageProcessorConfiguration implements - ICsvFileReaderConfiguration - { - private static final String SEPARATOR_PROPERTY_KEY = "separator"; - - private static final String IGNORE_COMMENTS_PROPERTY_KEY = "ignore-comments"; - - private static final String WELL_NAME_ROW_PROPERTY_KEY = "well-name-row"; - - private static final String WELL_NAME_COL_PROPERTY_KEY = "well-name-col"; - - private static final String WELL_NAME_COL_ALPHA_NUM_PROPERTY_KEY = - "well-name-col-is-alphanum"; - - private static final char DEFAULT_DELIMITER = ';'; - - private static final String DEFAULT_WELL_ROW = "WellName"; - - private static final String DEFAULT_WELL_COL = "WellName"; - - private static final boolean DEFAULT_WELL_ROW_ALPHANUM = true; - - private final char columnDelimiter; - - private final boolean ignoreComments; - - private final char comment; - - private final String wellRow; - - private final String wellColumn; - - private final boolean isWellColAlphanumeric; - - private FeatureVectorStorageProcessorConfiguration(Properties properties) - { - comment = '#'; - - this.columnDelimiter = - PropertyUtils.getChar(properties, SEPARATOR_PROPERTY_KEY, DEFAULT_DELIMITER); - this.ignoreComments = - PropertyUtils.getBoolean(properties, IGNORE_COMMENTS_PROPERTY_KEY, true); - - this.wellRow = properties.getProperty(WELL_NAME_ROW_PROPERTY_KEY, DEFAULT_WELL_ROW); - - this.wellColumn = properties.getProperty(WELL_NAME_COL_PROPERTY_KEY, DEFAULT_WELL_COL); - - this.isWellColAlphanumeric = - PropertyUtils.getBoolean(properties, WELL_NAME_COL_ALPHA_NUM_PROPERTY_KEY, - DEFAULT_WELL_ROW_ALPHANUM); - } - - public char getColumnDelimiter() - { - return columnDelimiter; - } - - public char getCommentDelimiter() - { - return comment; - } - - public boolean isIgnoreComments() - { - return ignoreComments; - } - - public boolean isSkipEmptyRecords() - { - return true; - } - - public String getWellRow() - { - return wellRow; - } - - public String getWellColumn() - { - return wellColumn; - } - - public boolean isWellColAlphanumeric() - { - return isWellColAlphanumeric; - } - } - private static final String ORIGINAL_DIR = "original"; private final FeatureVectorStorageProcessorConfiguration configuration; diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessorConfiguration.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessorConfiguration.java new file mode 100644 index 00000000000..a9b7b2efe89 --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/FeatureVectorStorageProcessorConfiguration.java @@ -0,0 +1,110 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.dss.etl.featurevector; + +import java.util.Properties; + +import ch.systemsx.cisd.common.utilities.PropertyUtils; +import ch.systemsx.cisd.utils.CsvFileReaderHelper.ICsvFileReaderConfiguration; + +class FeatureVectorStorageProcessorConfiguration implements + ICsvFileReaderConfiguration +{ + private static final String SEPARATOR_PROPERTY_KEY = "separator"; + + private static final String IGNORE_COMMENTS_PROPERTY_KEY = "ignore-comments"; + + private static final String WELL_NAME_ROW_PROPERTY_KEY = "well-name-row"; + + private static final String WELL_NAME_COL_PROPERTY_KEY = "well-name-col"; + + private static final String WELL_NAME_COL_ALPHA_NUM_PROPERTY_KEY = + "well-name-col-is-alphanum"; + + private static final char DEFAULT_DELIMITER = ';'; + + private static final String DEFAULT_WELL_ROW = "WellName"; + + private static final String DEFAULT_WELL_COL = "WellName"; + + private static final boolean DEFAULT_WELL_ROW_ALPHANUM = true; + + private final char columnDelimiter; + + private final boolean ignoreComments; + + private final char comment; + + private final String wellRow; + + private final String wellColumn; + + private final boolean isWellColAlphanumeric; + + FeatureVectorStorageProcessorConfiguration(Properties properties) + { + comment = '#'; + + this.columnDelimiter = + PropertyUtils.getChar(properties, SEPARATOR_PROPERTY_KEY, DEFAULT_DELIMITER); + this.ignoreComments = + PropertyUtils.getBoolean(properties, IGNORE_COMMENTS_PROPERTY_KEY, true); + + this.wellRow = properties.getProperty(WELL_NAME_ROW_PROPERTY_KEY, DEFAULT_WELL_ROW); + + this.wellColumn = properties.getProperty(WELL_NAME_COL_PROPERTY_KEY, DEFAULT_WELL_COL); + + this.isWellColAlphanumeric = + PropertyUtils.getBoolean(properties, WELL_NAME_COL_ALPHA_NUM_PROPERTY_KEY, + DEFAULT_WELL_ROW_ALPHANUM); + } + + public char getColumnDelimiter() + { + return columnDelimiter; + } + + public char getCommentDelimiter() + { + return comment; + } + + public boolean isIgnoreComments() + { + return ignoreComments; + } + + public boolean isSkipEmptyRecords() + { + return true; + } + + public String getWellRow() + { + return wellRow; + } + + public String getWellColumn() + { + return wellColumn; + } + + public boolean isWellColAlphanumeric() + { + return isWellColAlphanumeric; + } +} \ No newline at end of file diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/FeatureStorageProcessor.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/FeatureStorageProcessor.java index cefaeba237a..81cda73a163 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/FeatureStorageProcessor.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/FeatureStorageProcessor.java @@ -40,7 +40,6 @@ import ch.systemsx.cisd.openbis.dss.etl.ScreeningContainerDatasetInfo; import ch.systemsx.cisd.openbis.dss.etl.dataaccess.IImagingUploadDAO; import ch.systemsx.cisd.openbis.dss.etl.featurevector.CanonicalFeatureVector; import ch.systemsx.cisd.openbis.dss.etl.featurevector.FeatureVectorUploader; -import ch.systemsx.cisd.openbis.dss.etl.featurevector.GenedataFormatToCanonicalFeatureVector; import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation; @@ -51,7 +50,7 @@ public class FeatureStorageProcessor extends AbstractDelegatingStorageProcessor { private static final char DELIMITER = ';'; - private static final String LAYER_PREFIX = "<Layer="; + public static final String LAYER_PREFIX = "<Layer="; private final DataSource dataSource; @@ -206,11 +205,17 @@ public class FeatureStorageProcessor extends AbstractDelegatingStorageProcessor dataAccessObject = createDAO(); FeatureVectorUploader uploader = - new FeatureVectorUploader(dataAccessObject, ScreeningContainerDatasetInfo - .createScreeningDatasetInfo(dataSetInformation)); + new FeatureVectorUploader(dataAccessObject, + createScreeningDatasetInfo(dataSetInformation)); uploader.uploadFeatureVectors(fvecs); } + private ScreeningContainerDatasetInfo createScreeningDatasetInfo( + DataSetInformation dataSetInformation) + { + return ScreeningContainerDatasetInfo.createScreeningDatasetInfo(dataSetInformation); + } + protected IImagingUploadDAO createDAO() { return QueryTool.getQuery(dataSource, IImagingUploadDAO.class); diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFeatureVectorMigrator.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFeatureVectorMigrator.java new file mode 100644 index 00000000000..4d956f9fe71 --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFeatureVectorMigrator.java @@ -0,0 +1,137 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.dss.etl.genedata; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Properties; + +import ch.systemsx.cisd.common.filesystem.FileUtilities; +import ch.systemsx.cisd.etlserver.DefaultStorageProcessor; +import ch.systemsx.cisd.openbis.dss.etl.ScreeningContainerDatasetInfo; +import ch.systemsx.cisd.openbis.dss.etl.dataaccess.IImagingUploadDAO; +import ch.systemsx.cisd.openbis.dss.etl.featurevector.AbstractFeatureVectorMigrator; +import ch.systemsx.cisd.openbis.dss.etl.featurevector.CanonicalFeatureVector; +import ch.systemsx.cisd.openbis.dss.etl.featurevector.FeatureVectorUploader; +import ch.systemsx.cisd.openbis.generic.shared.dto.SimpleDataSetInformationDTO; + +/** + * Imports Genedata feature vectors into the database. + * + * @author Chandrasekhar Ramakrishnan + */ +public class GenedataFeatureVectorMigrator extends AbstractFeatureVectorMigrator +{ + + private static final String GENEDATA_FEATURES_EXTENSION = ".stat"; + + /** + * @param properties + */ + public GenedataFeatureVectorMigrator(Properties properties) + { + super(properties); + } + + @Override + protected AbstractImageDbImporter createImporter(ScreeningContainerDatasetInfo dataSetInfo, + File fileToMigrate) + { + AbstractImageDbImporter importer; + importer = new ImporterGenedata(dao, dataSetInfo, fileToMigrate); + + return importer; + } + + @Override + protected AbstractMigrationDecision createMigrationDecision(File dataset) + { + AbstractMigrationDecision decision = new MigrationDecision(dataset, knownDataSetsByCode); + return decision; + } + + /** + * Helper class for deciding if a file needs to be migrated. + * + * @author Chandrasekhar Ramakrishnan + */ + private static class MigrationDecision extends AbstractMigrationDecision + { + /** + * @param dataset + * @param knownDataSetsByCode + */ + public MigrationDecision(File dataset, + HashMap<String, SimpleDataSetInformationDTO> knownDataSetsByCode) + { + super(dataset, knownDataSetsByCode); + } + + @Override + protected File tryFileToMigrate() + { + File originalDataset = DefaultStorageProcessor.getOriginalDirectory(dataset); + File[] files = originalDataset.listFiles(); + + for (File file : files) + { + if (file.getName().endsWith(GENEDATA_FEATURES_EXTENSION)) + { + return file; + } + } + + return null; + } + } + + /** + * Helper class for importing genedata feature vecotr files. + * + * @author Chandrasekhar Ramakrishnan + */ + private static class ImporterGenedata extends AbstractImageDbImporter + { + + /** + * @param fileToMigrate + */ + private ImporterGenedata(IImagingUploadDAO dao, + ScreeningContainerDatasetInfo screeningDataSetInfo, File fileToMigrate) + { + super(dao, screeningDataSetInfo, fileToMigrate); + } + + @Override + public void doImport() + { + List<String> lines = FileUtilities.loadToStringList(fileToMigrate); + GenedataFormatToCanonicalFeatureVector convertor = + new GenedataFormatToCanonicalFeatureVector(lines, + FeatureStorageProcessor.LAYER_PREFIX); + ArrayList<CanonicalFeatureVector> fvecs = convertor.convert(); + + FeatureVectorUploader uploader = new FeatureVectorUploader(dao, screeningDataSetInfo); + uploader.uploadFeatureVectors(fvecs); + dao.commit(); + isSuccessful = true; + } + } + +} diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/GenedataFormatToCanonicalFeatureVector.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFormatToCanonicalFeatureVector.java similarity index 98% rename from screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/GenedataFormatToCanonicalFeatureVector.java rename to screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFormatToCanonicalFeatureVector.java index 4db30d31dec..ae2a11c49a4 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/featurevector/GenedataFormatToCanonicalFeatureVector.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFormatToCanonicalFeatureVector.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package ch.systemsx.cisd.openbis.dss.etl.featurevector; +package ch.systemsx.cisd.openbis.dss.etl.genedata; import java.util.ArrayList; import java.util.Collections; @@ -29,6 +29,7 @@ import ch.systemsx.cisd.common.exceptions.UserFailureException; import ch.systemsx.cisd.common.geometry.Point; import ch.systemsx.cisd.openbis.dss.etl.dataaccess.ImgFeatureDefDTO; import ch.systemsx.cisd.openbis.dss.etl.dataaccess.ImgFeatureValuesDTO; +import ch.systemsx.cisd.openbis.dss.etl.featurevector.CanonicalFeatureVector; /** * Converts currentFeature vectors from the Genedata currentFeature vector file format to diff --git a/screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFormatToCanonicalFeatureVectorTest.java b/screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFormatToCanonicalFeatureVectorTest.java index 00e57286d1c..2427bd798e6 100644 --- a/screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFormatToCanonicalFeatureVectorTest.java +++ b/screening/sourceTest/java/ch/systemsx/cisd/openbis/dss/etl/genedata/GenedataFormatToCanonicalFeatureVectorTest.java @@ -28,7 +28,6 @@ import ch.systemsx.cisd.common.filesystem.FileUtilities; import ch.systemsx.cisd.openbis.dss.etl.dataaccess.ImgFeatureDefDTO; import ch.systemsx.cisd.openbis.dss.etl.dataaccess.ImgFeatureValuesDTO; import ch.systemsx.cisd.openbis.dss.etl.featurevector.CanonicalFeatureVector; -import ch.systemsx.cisd.openbis.dss.etl.featurevector.GenedataFormatToCanonicalFeatureVector; /** * Check that Genedata feature vectors can be converted to the canonical form. -- GitLab