From 23d1feb95a64d345b31529e07d08ac83ad2588ae Mon Sep 17 00:00:00 2001
From: felmer <felmer>
Date: Tue, 7 Dec 2010 08:05:35 +0000
Subject: [PATCH] LMS-1880 first version of plugin DataSetToSOFT

SVN: 19023
---
 .../cisd/dsu/dss/plugins/DataSetToSOFT.java   | 325 ++++++++++++++++++
 1 file changed, 325 insertions(+)
 create mode 100644 deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/plugins/DataSetToSOFT.java

diff --git a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/plugins/DataSetToSOFT.java b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/plugins/DataSetToSOFT.java
new file mode 100644
index 00000000000..a31a7ed0764
--- /dev/null
+++ b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/dss/plugins/DataSetToSOFT.java
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2010 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.ethz.bsse.cisd.dsu.dss.plugins;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import javax.activation.DataHandler;
+import javax.activation.DataSource;
+import javax.mail.util.ByteArrayDataSource;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+
+import ch.systemsx.cisd.base.exceptions.CheckedExceptionTunnel;
+import ch.systemsx.cisd.common.exceptions.Status;
+import ch.systemsx.cisd.common.logging.LogCategory;
+import ch.systemsx.cisd.common.logging.LogFactory;
+import ch.systemsx.cisd.common.mail.EMailAddress;
+import ch.systemsx.cisd.common.utilities.MD5ChecksumCalculator;
+import ch.systemsx.cisd.common.utilities.Template;
+import ch.systemsx.cisd.openbis.dss.generic.server.plugins.tasks.IProcessingPluginTask;
+import ch.systemsx.cisd.openbis.dss.generic.server.plugins.tasks.ProcessingStatus;
+import ch.systemsx.cisd.openbis.dss.generic.shared.DataSetProcessingContext;
+import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
+import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider;
+import ch.systemsx.cisd.openbis.generic.shared.basic.TechId;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ExternalData;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.IEntityProperty;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ListSampleCriteria;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample;
+import ch.systemsx.cisd.openbis.generic.shared.dto.DatasetDescription;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifierFactory;
+
+/**
+ * Processing plugin for exporting meta data of a flow lane as a SOFT file.
+ *
+ * @author Franz-Josef Elmer
+ */
+public class DataSetToSOFT implements IProcessingPluginTask
+{
+    private static final String EXTERNAL_SAMPLE_NAME_PROPERTY = "EXTERNAL_SAMPLE_NAME";
+
+    private static final String EMPTY = "<<<NEED_TO_BE_FILLED>>>";
+
+    private static final Template SOFT_FILE_NAME_TEMPLATE = new Template(
+            "${flow-lane}_${external-sample-name}_SOFT.txt");
+
+    private static final Template E_MAIL_SUBJECT_TEMPLATE = new Template(
+    "SOFT file for ${external-sample-name}");
+    
+    private static final Template E_MAIL_CONTENT_TEMPLATE =
+            new Template("Dear User\n\n"
+                    + "Enclosed you will find the SOFT file for ${external-sample-name}.\n"
+                    + "Flow lane: ${flow-lane}\nData Set: ${data-set}");
+    
+    private static final class SOFTBuilder
+    {
+        private final StringBuilder builder = new StringBuilder();
+        
+        void addSample(Sample sample, String propertyTypeCode)
+        {
+            addLine('^', "SAMPLE", getProperty(sample, propertyTypeCode));
+        }
+        
+        void addSampleProperty(String key, Sample sample, String propertyTypeCode, Map<String, String> translation)
+        {
+            String property = getProperty(sample, propertyTypeCode);
+            String translatedProperty = translation.get(property);
+            addProperty(key, translatedProperty == null ? property : translatedProperty);
+        }
+        
+        void addSampleProperty(String key, Sample sample, String propertyTypeCode)
+        {
+            addProperty(key, getProperty(sample, propertyTypeCode));
+        }
+        
+        void addProperty(String key, String property)
+        {
+            addLine('!', "Sample_" + key, property);
+        }
+        
+        private void addLine(char prefix, String key, String value)
+        {
+            builder.append(prefix).append(key).append(" = ").append(value).append('\n');
+        }
+        
+        private String getProperty(Sample sample, String propertyTypeCode)
+        {
+            String property = tryToGetProperty(sample, propertyTypeCode);
+            return property == null ? EMPTY : property;
+        }
+
+        @Override
+        public String toString()
+        {
+            return builder.toString();
+        }
+    }
+    
+    private static final String tryToGetProperty(Sample sample, String propertyTypeCode)
+    {
+        List<IEntityProperty> properties = sample.getProperties();
+        for (IEntityProperty property : properties)
+        {
+            if (property.getPropertyType().getCode().equals(propertyTypeCode))
+            {
+                return property.tryGetAsString();
+            }
+        }
+        return null;
+    }
+        
+        
+    private static final Logger operationLog =
+        LogFactory.getLogger(LogCategory.OPERATION, DataSetToSOFT.class);
+
+    private static final long serialVersionUID = 1L;
+    private final File storeRoot;
+
+    private Map<String, String> translation;
+
+    public DataSetToSOFT(Properties properties, File storeRoot)
+    {
+        this.storeRoot = storeRoot;
+        translation = new HashMap<String, String>();
+        translation.put("GENOMIC_DNA", "genomic");
+        translation.put("FRAGMENTED_GENOMIC_DNA", "genomic");
+        translation.put("CHIP", "genomic");
+        translation.put("BISULFITE", "genomic");
+        translation.put("NON_GENOMIC", "non-genomic");
+        translation.put("SYNTHETIC", "synthetic");
+        translation.put("VIRAL_RNA", "viral RNA");
+        translation.put("SMALL_RNA", "other");
+        translation.put("TOTAL_RNA", "other");
+        translation.put("MRNA", "other");
+        translation.put("PROCESSED_DNA_LIBRARY", "<<<NEED_TO_BE_FILLED>>>");
+    }
+    
+    public ProcessingStatus process(List<DatasetDescription> datasets,
+            DataSetProcessingContext context)
+    {
+        EMailAddress address = new EMailAddress(context.getUserEmailOrNull());
+        IEncapsulatedOpenBISService service = ServiceProvider.getOpenBISService();
+        ProcessingStatus status = new ProcessingStatus();
+        for (DatasetDescription datasetDescription : datasets)
+        {
+            try
+            {
+                String dataSetCode = datasetDescription.getDatasetCode();
+                if (operationLog.isInfoEnabled())
+                {
+                    operationLog.info("Create SOFT file for data set " + dataSetCode);
+                }
+                ExternalData srfDataSet = service.tryGetDataSet(dataSetCode);
+                Sample flowLaneSample = getFlowLaneSample(srfDataSet);
+                Sample flowCellSample = getFlowCellSample(flowLaneSample);
+                Sample sequencingSample = getSequencingSample(flowLaneSample);
+                File srfFile = tryToFindSrfFile(datasetDescription);
+                if (srfFile == null)
+                {
+                    status.addDatasetStatus(datasetDescription,
+                            Status.createError("Data set " + dataSetCode + " has no srf file."));
+                    continue;
+                }
+                String checkSum = calculateCheckSum(srfFile);
+                
+                SOFTBuilder softBuilder = new SOFTBuilder();
+                softBuilder.addSample(sequencingSample, EXTERNAL_SAMPLE_NAME_PROPERTY);
+                softBuilder.addProperty("type", "SRA");
+                softBuilder.addSampleProperty("title", sequencingSample, EXTERNAL_SAMPLE_NAME_PROPERTY);
+                softBuilder.addSampleProperty("source_name", sequencingSample, "SAMPLE_SOURCE_NAME");
+                softBuilder.addSampleProperty("organism", sequencingSample, "NCBI_ORGANISM_TAXONOMY");
+                softBuilder.addSampleProperty("characteristics", sequencingSample, "SAMPLE_CHARACTERISTICS");
+                softBuilder.addSampleProperty("biomaterial_provider", sequencingSample, "CONTACT_PERSON_NAME");
+                softBuilder.addSampleProperty("molecule", sequencingSample, "SEQUENCING_APPLICATION");
+                softBuilder.addSampleProperty("extract_protocol", sequencingSample, "SAMPLE_EXTRACT_PROTOCOL");
+                softBuilder.addSampleProperty("data_processing", sequencingSample, "AMPLE_DATA_PROCESSING");
+                softBuilder.addSampleProperty("library_strategy", sequencingSample, "SEQUENCING_APPLICATION");
+                softBuilder.addSampleProperty("library_source", sequencingSample, "SAMPLE_KIND", translation);
+                softBuilder.addSampleProperty("library_selection", sequencingSample, "SAMPLE_KIND");
+                softBuilder.addSampleProperty("instrument_model", flowCellSample, "GENOME_ANALYZER");
+                softBuilder.addProperty("raw_file_1", srfFile.getName());
+                softBuilder.addProperty("raw_file_type_1", "srf");
+                softBuilder.addProperty("file_checksum_1", checkSum);
+                
+                String subject = createSubject(sequencingSample);
+                String content = createContent(sequencingSample, flowLaneSample, srfDataSet);
+                String fileName = createSoftFileName(sequencingSample, flowLaneSample);
+                DataSource dataSource = createDataSource(softBuilder.toString());
+                context.getMailClient().sendEmailMessageWithAttachment(subject, content, fileName,
+                        new DataHandler(dataSource), null, null, address);
+                status.addDatasetStatus(datasetDescription, Status.OK);
+            } catch (Exception ex)
+            {
+                status.addDatasetStatus(datasetDescription, Status.createError("Exception occured: " + ex));
+                operationLog.error("Exception occured while processing " + datasetDescription, ex);
+            }
+        }
+        return status;
+    }
+    
+    private String createSubject(Sample sequencingSample)
+    {
+        Template template = E_MAIL_SUBJECT_TEMPLATE.createFreshCopy();
+        bindExternalSampleName(template, sequencingSample);
+        return template.createText();
+    }
+
+    private String createContent(Sample sequencingSample, Sample flowLaneSample, ExternalData dataSet)
+    {
+        Template template = E_MAIL_CONTENT_TEMPLATE.createFreshCopy();
+        bindExternalSampleName(template, sequencingSample);
+        template.bind("flow-lane", flowLaneSample.getIdentifier());
+        template.bind("data-set", dataSet.getCode());
+        return template.createText();
+    }
+    
+    private String createSoftFileName(Sample sequencingSample, Sample flowLaneSample)
+    {
+        Template template = SOFT_FILE_NAME_TEMPLATE.createFreshCopy();
+        bindExternalSampleName(template, sequencingSample);
+        template.bind("flow-lane", flowLaneSample.getCode().replace(':', '-'));
+        return template.createText();
+    }
+    
+    private void bindExternalSampleName(Template template, Sample sequencingSample)
+    {
+        String externalSampleName = tryToGetProperty(sequencingSample, EXTERNAL_SAMPLE_NAME_PROPERTY);
+        template.bind("external-sample-name", externalSampleName);
+    }
+    
+    private String calculateCheckSum(File srfFile)
+    {
+        String checkSum;
+        try
+        {
+            checkSum = MD5ChecksumCalculator.calculate(FileUtils.readFileToByteArray(srfFile));
+        } catch (IOException ex)
+        {
+            throw CheckedExceptionTunnel.wrapIfNecessary(ex);
+        }
+        return checkSum;
+    }
+    
+    private File tryToFindSrfFile(DatasetDescription datasetDescription)
+    {
+        File root = new File(storeRoot, datasetDescription.getDataSetLocation());
+        return tryToFindSrfFile(root);
+    }
+    
+    private File tryToFindSrfFile(File file)
+    {
+        if (file.isFile() && file.getName().endsWith(".srf"))
+        {
+            return file;
+        }
+        if (file.isDirectory())
+        {
+            File[] files = file.listFiles();
+            for (File child : files)
+            {
+                File srfFile = tryToFindSrfFile(child);
+                if (srfFile != null)
+                {
+                    return srfFile;
+                }
+            }
+        }
+        return null;
+    }
+    
+    private Sample getFlowLaneSample(ExternalData dataSet)
+    {
+        IEncapsulatedOpenBISService service = ServiceProvider.getOpenBISService();
+        SampleIdentifier identifier = SampleIdentifierFactory.parse(dataSet.getSampleIdentifier());
+        return service.tryGetSampleWithExperiment(identifier);
+    }
+    
+    private Sample getFlowCellSample(Sample flowLaneSample)
+    {
+        IEncapsulatedOpenBISService service = ServiceProvider.getOpenBISService();
+        SampleIdentifier identifier = SampleIdentifierFactory.parse(flowLaneSample.getContainer().getIdentifier());
+        return service.tryGetSampleWithExperiment(identifier);
+    }
+    
+    private Sample getSequencingSample(Sample flowLaneSample)
+    {
+        IEncapsulatedOpenBISService service = ServiceProvider.getOpenBISService();
+        List<Sample> parents = service.listSamples(ListSampleCriteria.createForChild(new TechId(flowLaneSample.getId())));
+        return parents.get(0);
+    }
+
+    private DataSource createDataSource(final String softData)
+    {
+        try
+        {
+            return new ByteArrayDataSource(softData, "text/plain");
+        } catch (IOException ex)
+        {
+            throw CheckedExceptionTunnel.wrapIfNecessary(ex);
+        }
+    }
+    
+
+}
-- 
GitLab