From 0b0cb3c41bd69f0f70a5fc81cd80273ac4a0a307 Mon Sep 17 00:00:00 2001 From: felmer <felmer> Date: Tue, 13 Oct 2009 06:04:58 +0000 Subject: [PATCH] LMS-1218 Few changes: Metafile name contains flow line sample code and property EXTERNAL_SAMPLE_NAME, Flow Line Data Set name contains group code and sample code with ':' instead of '_'. Flow line ETL thread examples added to service.properties SVN: 12906 --- .../dist/etc/service.properties | 24 ++++++- deep_sequencing_unit/etc/service.properties | 27 +++++++- .../bsse/cisd/dsu/dss/FlowLineFeeder.java | 30 ++++++--- .../bsse/cisd/dsu/dss/FlowLineFeederTest.java | 62 +++++++++++++------ 4 files changed, 115 insertions(+), 28 deletions(-) diff --git a/deep_sequencing_unit/dist/etc/service.properties b/deep_sequencing_unit/dist/etc/service.properties index fdf1278cd0c..e33bef07dd7 100644 --- a/deep_sequencing_unit/dist/etc/service.properties +++ b/deep_sequencing_unit/dist/etc/service.properties @@ -86,7 +86,7 @@ data-set-file-name-entity-separator = _ # Comma separated names of processing threads. Each thread should have configuration properties prefixed with its name. # E.g. 'code-extractor' property for the thread 'my-etl' should be specified as 'my-etl.code-extractor' -inputs = flow-cell +inputs = flow-cell, fl1, fl2 # --------------------------------------------------------------------------- # 'flow-cell' thread configuration @@ -126,3 +126,25 @@ flow-cell.storage-processor = ch.ethz.bsse.cisd.dsu.dss.StorageProcessor flow-cell.storage-processor.processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor # Template of flow-line drop boxes. {0} is the place holder for flow line number flow-cell.storage-processor.flow-line-drop-box-template = data/drop-box-{0} +flow-cell.storage-processor.transfer.BSSE = data/drop-box-bsse + +# -------- Flow Line 1 -------------------------------------- +fl1.incoming-dir = data/drop-box-1 +fl1.data-set-info-extractor = ch.systemsx.cisd.etlserver.DefaultDataSetInfoExtractor +fl1.data-set-info-extractor.entity-separator = ${data-set-file-name-entity-separator} +fl1.data-set-info-extractor.index-of-group-code = 0 +fl1.type-extractor = ch.systemsx.cisd.etlserver.SimpleTypeExtractor +fl1.type-extractor.file-format-type = PROPRIETARY +fl1.type-extractor.data-set-type = FLOW_LINE +fl1.storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor + +# -------- Flow Line 2 -------------------------------------- +fl2.incoming-dir = data/drop-box-2 +fl2.data-set-info-extractor = ch.systemsx.cisd.etlserver.DefaultDataSetInfoExtractor +fl2.data-set-info-extractor.entity-separator = ${data-set-file-name-entity-separator} +fl2.data-set-info-extractor.index-of-group-code = 0 +fl2.type-extractor = ch.systemsx.cisd.etlserver.SimpleTypeExtractor +fl2.type-extractor.file-format-type = PROPRIETARY +fl2.type-extractor.data-set-type = FLOW_LINE +fl2.storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor + diff --git a/deep_sequencing_unit/etc/service.properties b/deep_sequencing_unit/etc/service.properties index 9a0d8770e48..0c30eca40dc 100644 --- a/deep_sequencing_unit/etc/service.properties +++ b/deep_sequencing_unit/etc/service.properties @@ -86,7 +86,7 @@ data-set-file-name-entity-separator = _ # Comma separated names of processing threads. Each thread should have configuration properties prefixed with its name. # E.g. 'code-extractor' property for the thread 'my-etl' should be specified as 'my-etl.code-extractor' -inputs = flow-cell +inputs = flow-cell, fl1, fl2 # --------------------------------------------------------------------------- # 'flow-cell' thread configuration @@ -118,10 +118,33 @@ flow-cell.data-set-info-extractor.group-code = TEST flow-cell.type-extractor = ch.systemsx.cisd.etlserver.SimpleTypeExtractor flow-cell.type-extractor.file-format-type = PROPRIETARY flow-cell.type-extractor.locator-type = RELATIVE_LOCATION -flow-cell.type-extractor.data-set-type = FLOW_LINE +flow-cell.type-extractor.data-set-type = FLOW_CELL flow-cell.type-extractor.is-measured = true # The storage processor (IStorageProcessor implementation) flow-cell.storage-processor = ch.ethz.bsse.cisd.dsu.dss.StorageProcessor flow-cell.storage-processor.processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor flow-cell.storage-processor.flow-line-drop-box-template = targets/playground/drop-box-{0} +flow-cell.storage-processor.entity-separator = ${data-set-file-name-entity-separator} +flow-cell.storage-processor.transfer.FMI = targets/playground/drop-box-fmi + +# -------- Flow Line 1 -------------------------------------- +fl1.incoming-dir = targets/playground/drop-box-1 +fl1.data-set-info-extractor = ch.systemsx.cisd.etlserver.DefaultDataSetInfoExtractor +fl1.data-set-info-extractor.entity-separator = ${data-set-file-name-entity-separator} +fl1.data-set-info-extractor.index-of-group-code = 0 +fl1.type-extractor = ch.systemsx.cisd.etlserver.SimpleTypeExtractor +fl1.type-extractor.file-format-type = PROPRIETARY +fl1.type-extractor.data-set-type = FLOW_LINE +fl1.storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor + +# -------- Flow Line 2 -------------------------------------- +fl2.incoming-dir = targets/playground/drop-box-2 +fl2.data-set-info-extractor = ch.systemsx.cisd.etlserver.DefaultDataSetInfoExtractor +fl2.data-set-info-extractor.entity-separator = ${data-set-file-name-entity-separator} +fl2.data-set-info-extractor.index-of-group-code = 0 +fl2.type-extractor = ch.systemsx.cisd.etlserver.SimpleTypeExtractor +fl2.type-extractor.file-format-type = PROPRIETARY +fl2.type-extractor.data-set-type = FLOW_LINE +fl2.storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor + diff --git a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeeder.java b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeeder.java index 4b01c89a826..49b63524f90 100644 --- a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeeder.java +++ b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeeder.java @@ -62,10 +62,13 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifierFa */ class FlowLineFeeder implements IPostRegistrationDatasetHandler { + static final String META_DATA_FILE_TYPE = ".tsv"; static final String TRANSFER_PREFIX = "transfer."; static final String AFFILIATION_KEY = "AFFILIATION"; - static final String META_DATA_FILE_NAME = "meta-data.tsv"; + static final String EXTERNAL_SAMPLE_NAME_KEY = "EXTERNAL_SAMPLE_NAME"; static final String FLOW_LINE_DROP_BOX_TEMPLATE = "flow-line-drop-box-template"; + static final String ENTITY_SEPARATOR_KEY = "entity-separator"; + static final String DEFAULT_ENTITY_SEPARATOR = "_"; static final String FILE_TYPE = ".srf"; private final static Logger operationLog = @@ -73,6 +76,7 @@ class FlowLineFeeder implements IPostRegistrationDatasetHandler private final IEncapsulatedOpenBISService service; private final MessageFormat flowLineDropBoxTemplate; + private final String entitySepaparator; private final IImmutableCopier copier; private final IFileOperations fileOperations; private final List<File> createdFiles = new ArrayList<File>(); @@ -84,6 +88,7 @@ class FlowLineFeeder implements IPostRegistrationDatasetHandler flowLineDropBoxTemplate = new MessageFormat(PropertyUtils.getMandatoryProperty(properties, FLOW_LINE_DROP_BOX_TEMPLATE)); + entitySepaparator = properties.getProperty(ENTITY_SEPARATOR_KEY, DEFAULT_ENTITY_SEPARATOR); copier = FastRecursiveHardLinkMaker.tryCreate(TimingParameters.getDefaultParameters()); fileOperations = FileOperations.getInstance(); Properties transferDropBoxMapping = @@ -112,8 +117,11 @@ class FlowLineFeeder implements IPostRegistrationDatasetHandler for (File file : files) { String flowLine = extractFlowLine(file); + Sample flowLineSample = flowLineSampleMap.get(flowLine); File dropBox = createDropBoxFile(flowLine); - String fileName = flowcellID + "_" + flowLine; + String fileName = + flowLineSample.getGroup().getCode() + entitySepaparator + flowcellID + + SampleIdentifier.CONTAINED_SAMPLE_CODE_SEPARARTOR_STRING + flowLine; File flowLineDataSet = new File(dropBox, fileName); if (flowLineDataSet.exists()) { @@ -128,7 +136,7 @@ class FlowLineFeeder implements IPostRegistrationDatasetHandler + flowLineDataSet.getAbsolutePath() + "'."); } createHartLink(file, flowLineDataSet); - createMetaDataFileAndHartLinkInTransferDropBox(flowLineDataSet, flowLineSampleMap, flowLine); + createMetaDataFileAndHartLinkInTransferDropBox(flowLineDataSet, flowLineSample, flowLine); File markerFile = new File(dropBox, Constants.IS_FINISHED_PREFIX + fileName); createdFiles.add(markerFile); FileUtilities.writeToFile(markerFile, ""); @@ -162,9 +170,8 @@ class FlowLineFeeder implements IPostRegistrationDatasetHandler } private void createMetaDataFileAndHartLinkInTransferDropBox(File flowLineDataSet, - Map<String, Sample> flowLineSampleMap, String flowLine) + Sample flowLineSample, String flowLine) { - Sample flowLineSample = flowLineSampleMap.get(flowLine); if (flowLineSample == null) { throw new UserFailureException("No flow line sample for flow line " + flowLine + " exists"); @@ -176,17 +183,26 @@ class FlowLineFeeder implements IPostRegistrationDatasetHandler SampleIdentifier identifier = SampleIdentifierFactory.parse(flowLineSample.getIdentifier()); IEntityProperty[] properties = service.getPropertiesOfTopSampleRegisteredFor(identifier); File dropBox = null; + String externalSampleName = null; for (IEntityProperty property : properties) { PropertyType propertyType = property.getPropertyType(); String value = property.tryGetAsString(); addLine(builder, propertyType.getLabel(), value); - if (propertyType.getCode().equals(AFFILIATION_KEY)) + String code = propertyType.getCode(); + if (code.equals(AFFILIATION_KEY)) { dropBox = transferDropBoxes.get(value); } + if (code.equals(EXTERNAL_SAMPLE_NAME_KEY)) + { + externalSampleName = value; + } } - FileUtilities.writeToFile(new File(flowLineDataSet, META_DATA_FILE_NAME), builder.toString()); + String metaFileName = + flowLineSample.getCode() + + (externalSampleName == null ? "" : "_" + externalSampleName) + META_DATA_FILE_TYPE; + FileUtilities.writeToFile(new File(flowLineDataSet, metaFileName), builder.toString()); if (dropBox != null) { createHartLink(flowLineDataSet, dropBox); diff --git a/deep_sequencing_unit/sourceTest/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeederTest.java b/deep_sequencing_unit/sourceTest/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeederTest.java index 52bbe053e41..875f6f50247 100644 --- a/deep_sequencing_unit/sourceTest/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeederTest.java +++ b/deep_sequencing_unit/sourceTest/java/ch/ethz/bsse/cisd/dsu/dss/FlowLineFeederTest.java @@ -41,6 +41,7 @@ import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation; import ch.systemsx.cisd.openbis.generic.client.web.client.exception.UserFailureException; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.GenericValueEntityProperty; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Group; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ListSampleCriteria; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Person; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PropertyType; @@ -56,6 +57,7 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifierFa public class FlowLineFeederTest extends AbstractFileSystemTestCase { private static final String AFFILIATION = "fmi"; + private static final String EXTERNAL_SAMPLE_NAME = "ext23"; private static final String TRANSFER_DROP_BOX = "transfer-drop-box"; private static final Sample EXAMPLE_FLOW_CELL_SAMPLE = createFlowCellSample(); private static final DataSetInformation EXAMPLE_DATA_SET_INFO = createDataSetInfo(); @@ -134,7 +136,7 @@ public class FlowLineFeederTest extends AbstractFileSystemTestCase @Test void testMissingDropBox() { - File flowCell = new File(workingDirectory, "abc"); + File flowCell = new File(workingDirectory, SAMPLE_CODE); assertEquals(true, flowCell.mkdir()); FileUtilities.writeToFile(new File(flowCell, "s_3.srf"), "hello flow line 3"); prepareLoadFlowCellSample(EXAMPLE_FLOW_CELL_SAMPLE); @@ -155,7 +157,7 @@ public class FlowLineFeederTest extends AbstractFileSystemTestCase @Test public void testUnkownFlowCell() { - File flowCell = new File(workingDirectory, "abc"); + File flowCell = new File(workingDirectory, SAMPLE_CODE); prepareLoadFlowCellSample(null); try @@ -174,7 +176,7 @@ public class FlowLineFeederTest extends AbstractFileSystemTestCase @Test public void testHappyCase() { - File flowCell = new File(workingDirectory, "abc"); + File flowCell = new File(workingDirectory, SAMPLE_CODE); assertEquals(true, flowCell.mkdir()); File logs = new File(flowCell, "logs"); assertEquals(true, logs.mkdir()); @@ -198,21 +200,24 @@ public class FlowLineFeederTest extends AbstractFileSystemTestCase File[] transferedFiles = transferDropBox.listFiles(); assertEquals(1, transferedFiles.length); - assertEquals("abc_2", transferedFiles[0].getName()); - getFile(transferedFiles[0], FlowLineFeeder.META_DATA_FILE_NAME); + String sampleName = + SAMPLE_CODE + SampleIdentifier.CONTAINED_SAMPLE_CODE_SEPARARTOR_STRING + "2"; + assertEquals("G2_" + sampleName, transferedFiles[0].getName()); + File metaFile = getFile(transferedFiles[0], FlowLineFeeder.META_DATA_FILE_TYPE); + assertEquals(sampleName + "_" + EXTERNAL_SAMPLE_NAME + FlowLineFeeder.META_DATA_FILE_TYPE, + metaFile.getName()); assertHardLinkOnSameFile(originalFlowLine2, getFile(transferedFiles[0], "2.srf")); context.assertIsSatisfied(); } - private File getFile(File folder, final String fileName) + private File getFile(File folder, final String fileNameExtension) { File[] files = folder.listFiles(new FilenameFilter() { - public boolean accept(File dir, String name) { - return fileName.equals(name); + return name.endsWith(fileNameExtension); } }); assertEquals(1, files.length); @@ -278,23 +283,37 @@ public class FlowLineFeederTest extends AbstractFileSystemTestCase one(service).getPropertiesOfTopSampleRegisteredFor(identifier); if (sample.getSubCode().equals("2")) { - GenericValueEntityProperty p = new GenericValueEntityProperty(); - p.setValue(AFFILIATION); - PropertyType propertyType = new PropertyType(); - propertyType.setCode(FlowLineFeeder.AFFILIATION_KEY); - propertyType.setLabel(FlowLineFeeder.AFFILIATION_KEY.toLowerCase()); - p.setPropertyType(propertyType); - will(returnValue(new GenericValueEntityProperty[] {p})); + GenericValueEntityProperty p1 = + createProperty(FlowLineFeeder.AFFILIATION_KEY, AFFILIATION); + GenericValueEntityProperty p2 = + createProperty(FlowLineFeeder.EXTERNAL_SAMPLE_NAME_KEY, + EXTERNAL_SAMPLE_NAME); + will(returnValue(new GenericValueEntityProperty[] {p1, p2})); } } } + + private GenericValueEntityProperty createProperty(String key, String value) + { + GenericValueEntityProperty p = new GenericValueEntityProperty(); + p.setValue(value); + PropertyType propertyType = new PropertyType(); + propertyType.setCode(key); + propertyType.setLabel(key.toLowerCase()); + p.setPropertyType(propertyType); + return p; + } }); } private void checkFlowLineDataSet(File originalFlowLine, String flowLineNumber) { File dropBox = new File(workingDirectory, DROP_BOX_PREFIX + flowLineNumber); - String fileName = "abc_" + flowLineNumber; + String flowLineSampleCode = + SAMPLE_CODE + SampleIdentifier.CONTAINED_SAMPLE_CODE_SEPARARTOR_STRING + + flowLineNumber; + String fileName = + "G" + flowLineNumber + FlowLineFeeder.DEFAULT_ENTITY_SEPARATOR + flowLineSampleCode; File ds = new File(dropBox, fileName); assertEquals(true, ds.isDirectory()); @@ -303,7 +322,10 @@ public class FlowLineFeederTest extends AbstractFileSystemTestCase assertEquals(FileUtilities.loadToString(originalFlowLine), FileUtilities .loadToString(flowLine)); assertHardLinkOnSameFile(originalFlowLine, flowLine); - assertEquals(true, new File(ds, FlowLineFeeder.META_DATA_FILE_NAME).exists()); + String metaDataFileName = + flowLineSampleCode + (flowLineNumber.equals("2") ? "_" + EXTERNAL_SAMPLE_NAME : "") + + FlowLineFeeder.META_DATA_FILE_TYPE; + assertEquals(true, new File(ds, metaDataFileName).exists()); assertEquals(true, new File(dropBox, Constants.IS_FINISHED_PREFIX + fileName).exists()); } @@ -318,7 +340,11 @@ public class FlowLineFeederTest extends AbstractFileSystemTestCase private Sample createFlowLineSample(int flowLineNumber) { Sample sample = new Sample(); - sample.setCode(SAMPLE_CODE); + sample.setCode(SAMPLE_CODE + SampleIdentifier.CONTAINED_SAMPLE_CODE_SEPARARTOR_STRING + + flowLineNumber); + Group group = new Group(); + group.setCode("G" + flowLineNumber); + sample.setGroup(group); sample.setSubCode(Integer.toString(flowLineNumber)); sample.setGeneratedFrom(EXAMPLE_FLOW_CELL_SAMPLE); Person registrator = new Person(); -- GitLab