Skip to content
Snippets Groups Projects
Commit ba2fd2ab authored by tpylak's avatar tpylak
Browse files

LMS-1192 BatchDataSetHandler needs to support data sets without sample

SVN: 13298
parent 2727102c
No related branches found
No related tags found
No related merge requests found
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
package ch.systemsx.cisd.yeastx.etl; package ch.systemsx.cisd.yeastx.etl;
import java.io.File; import java.io.File;
import java.util.Collections; import java.util.Arrays;
import java.util.Properties; import java.util.Properties;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
...@@ -28,6 +28,7 @@ import ch.systemsx.cisd.common.utilities.ExtendedProperties; ...@@ -28,6 +28,7 @@ import ch.systemsx.cisd.common.utilities.ExtendedProperties;
import ch.systemsx.cisd.etlserver.IDataSetInfoExtractor; import ch.systemsx.cisd.etlserver.IDataSetInfoExtractor;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation; import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation;
import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifier;
/** /**
* @author Tomasz Pylak * @author Tomasz Pylak
...@@ -56,14 +57,17 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor ...@@ -56,14 +57,17 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor
DataSetInformationYeastX info = new DataSetInformationYeastX(); DataSetInformationYeastX info = new DataSetInformationYeastX();
info.setComplete(true); info.setComplete(true);
info.setDataSetProperties(plainInfo.getProperties()); info.setDataSetProperties(plainInfo.getProperties());
String sampleCode = getSampleCode(plainInfo, openbisService, log); setSampleOrExperiment(openbisService, log, plainInfo, info);
info.setSampleCode(sampleCode);
info.setGroupCode(plainInfo.getGroupCode()); info.setGroupCode(plainInfo.getGroupCode());
MLConversionType conversion = getConversion(plainInfo.getConversion()); MLConversionType conversion = getConversion(plainInfo.getConversion());
info.setConversion(conversion); info.setConversion(conversion);
if (StringUtils.isNotBlank(plainInfo.getParentDataSetCode())) String parentDataSetCodes = plainInfo.getParentDataSetCodes();
if (StringUtils.isNotBlank(parentDataSetCodes))
{ {
info.setParentDataSetCodes(Collections.singletonList(plainInfo.getParentDataSetCode())); String[] parentCodes =
parentDataSetCodes
.split(DataSetMappingInformation.PARENT_DATASETS_SEPARATOR);
info.setParentDataSetCodes(Arrays.asList(parentCodes));
} }
fileNameDecorator.enrich(info, incomingDataSetPath); fileNameDecorator.enrich(info, incomingDataSetPath);
return info; return info;
...@@ -75,6 +79,29 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor ...@@ -75,6 +79,29 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor
} }
} }
private void setSampleOrExperiment(IEncapsulatedOpenBISService openbisService, LogUtils log,
DataSetMappingInformation mapping, DataSetInformationYeastX info)
{
DatasetMappingResolver mappingResolver =
new DatasetMappingResolver(properties, openbisService);
String sampleCode = mappingResolver.tryFigureSampleCode(mapping, log);
if (sampleCode != null)
{
info.setSampleCode(sampleCode);
} else
{
ExperimentIdentifier experimentIdentifier =
DatasetMappingResolver.tryFigureExperimentIdentifier(mapping);
if (experimentIdentifier == null)
{
throw new UserFailureException(
"Both sample and experiment are not provided for the file "
+ mapping.getFileName());
}
info.setExperimentIdentifier(experimentIdentifier);
}
}
private static MLConversionType getConversion(String conversion) private static MLConversionType getConversion(String conversion)
{ {
MLConversionType conversionType = MLConversionType.tryCreate(conversion); MLConversionType conversionType = MLConversionType.tryCreate(conversion);
...@@ -85,19 +112,4 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor ...@@ -85,19 +112,4 @@ public class BatchDataSetInfoExtractor implements IDataSetInfoExtractor
} }
return conversionType; return conversionType;
} }
private String getSampleCode(DataSetMappingInformation mapping,
IEncapsulatedOpenBISService openbisService, LogUtils log)
{
String sampleCode =
new DatasetMappingResolver(properties, openbisService).tryFigureSampleCode(mapping,
log);
if (sampleCode == null)
{
// should not happen, the dataset handler should skip datasets with incorrect mapping
throw UserFailureException.fromTemplate("Cannot find a sample for the file '%s'.",
mapping.getFileName());
}
return sampleCode;
}
} }
...@@ -31,6 +31,8 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty; ...@@ -31,6 +31,8 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty;
*/ */
public class DataSetMappingInformation public class DataSetMappingInformation
{ {
public static final String PARENT_DATASETS_SEPARATOR = ",";
private String fileName; private String fileName;
private String sampleCodeOrLabel; private String sampleCodeOrLabel;
...@@ -42,8 +44,8 @@ public class DataSetMappingInformation ...@@ -42,8 +44,8 @@ public class DataSetMappingInformation
private String groupCode; private String groupCode;
private String conversion; private String conversion;
private String parentDataSetCode; private String parentDataSetCodes;
private List<NewProperty> properties; private List<NewProperty> properties;
...@@ -63,24 +65,24 @@ public class DataSetMappingInformation ...@@ -63,24 +65,24 @@ public class DataSetMappingInformation
return sampleCodeOrLabel; return sampleCodeOrLabel;
} }
@BeanProperty(label = "sample") @BeanProperty(label = "sample", optional = true)
public void setSampleCodeOrLabel(String sampleCodeOrLabel) public void setSampleCodeOrLabel(String sampleCodeOrLabel)
{ {
this.sampleCodeOrLabel = StringUtils.trimToNull(sampleCodeOrLabel); this.sampleCodeOrLabel = StringUtils.trimToNull(sampleCodeOrLabel);
} }
/** /**
* Returns the code of the parent data set, if any. * Returns the codes of the parent data sets, separated by {@link #PARENT_DATASETS_SEPARATOR}.
*/ */
public final String getParentDataSetCode() public final String getParentDataSetCodes()
{ {
return parentDataSetCode; return parentDataSetCodes;
} }
@BeanProperty(label="parent", optional = true) @BeanProperty(label = "parent", optional = true)
public final void setParentDataSetCode(String parentCode) public final void setParentDataSetCode(String parentCodes)
{ {
this.parentDataSetCode = parentCode; this.parentDataSetCodes = parentCodes;
} }
public String getExperimentName() public String getExperimentName()
......
...@@ -25,8 +25,10 @@ import ch.systemsx.cisd.common.collections.CollectionUtils; ...@@ -25,8 +25,10 @@ import ch.systemsx.cisd.common.collections.CollectionUtils;
import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException; import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException;
import ch.systemsx.cisd.common.exceptions.UserFailureException; import ch.systemsx.cisd.common.exceptions.UserFailureException;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample;
import ch.systemsx.cisd.openbis.generic.shared.dto.ListSamplesByPropertyCriteria; import ch.systemsx.cisd.openbis.generic.shared.dto.ListSamplesByPropertyCriteria;
import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifier;
import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.GroupIdentifier; import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.GroupIdentifier;
import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.LocalExperimentIdentifier; import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.LocalExperimentIdentifier;
import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier; import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier;
...@@ -106,6 +108,10 @@ class DatasetMappingResolver ...@@ -106,6 +108,10 @@ class DatasetMappingResolver
public String tryFigureSampleCode(DataSetMappingInformation mapping, LogUtils log) public String tryFigureSampleCode(DataSetMappingInformation mapping, LogUtils log)
{ {
String sampleCodeOrLabel = mapping.getSampleCodeOrLabel(); String sampleCodeOrLabel = mapping.getSampleCodeOrLabel();
if (sampleCodeOrLabel == null)
{
return null;
}
if (samplePropertyCodeOrNull == null) if (samplePropertyCodeOrNull == null)
{ {
return sampleCodeOrLabel; return sampleCodeOrLabel;
...@@ -204,13 +210,70 @@ class DatasetMappingResolver ...@@ -204,13 +210,70 @@ class DatasetMappingResolver
{ {
return false; return false;
} }
if (isConversionColumnValid(mapping, log) == false)
{
return false;
}
String sampleCode = tryFigureSampleCode(mapping, log); String sampleCode = tryFigureSampleCode(mapping, log);
ExperimentIdentifier experimentIdentifier = tryFigureExperimentIdentifier(mapping);
if (sampleCode == null) if (sampleCode == null)
{ {
// sample can be skipped only if experiment identifier is supplied
if (experimentIdentifier == null)
{
log.datasetMappingError(mapping,
"neither sample nor experiment has been specified.");
return false;
} else
{
return experimentExists(mapping, log, experimentIdentifier);
}
} else
{
return sampleExistsAndBelongsToExperiment(mapping, log, sampleCode);
}
}
private boolean experimentExists(DataSetMappingInformation mapping, LogUtils log,
ExperimentIdentifier experimentIdentifier)
{
try
{
Experiment experiment = openbisService.tryToGetExperiment(experimentIdentifier);
if (experiment == null)
{
log.datasetMappingError(mapping, "experiment '%s' does not exist",
experimentIdentifier);
return false;
} else
{
return true;
}
} catch (UserFailureException ex)
{
// if project or group is unknown then an exception is thrown
log.datasetMappingError(mapping, "experiment '%s' does not exist: %s",
experimentIdentifier, ex.getMessage());
return false; return false;
} }
return isConversionColumnValid(mapping, log) }
&& existsAndBelongsToExperiment(mapping, log, sampleCode);
/**
* NOTE: we do not support experiment names if the dataset has to be connected to the experiment
* directly.
*/
public static ExperimentIdentifier tryFigureExperimentIdentifier(
DataSetMappingInformation mapping)
{
String project = mapping.getProjectCode();
String experimentCode = mapping.getExperimentName();
if (project != null && experimentCode != null)
{
return new ExperimentIdentifier(null, mapping.getGroupCode(), project, experimentCode);
} else
{
return null;
}
} }
private static boolean isConversionColumnValid(final DataSetMappingInformation mapping, private static boolean isConversionColumnValid(final DataSetMappingInformation mapping,
...@@ -235,7 +298,7 @@ class DatasetMappingResolver ...@@ -235,7 +298,7 @@ class DatasetMappingResolver
log.datasetMappingError(mapping, "conversion column must be empty " log.datasetMappingError(mapping, "conversion column must be empty "
+ "for this type of file."); + "for this type of file.");
return false; return false;
} }
return true; return true;
} }
...@@ -247,8 +310,8 @@ class DatasetMappingResolver ...@@ -247,8 +310,8 @@ class DatasetMappingResolver
return conversionRequired; return conversionRequired;
} }
private boolean existsAndBelongsToExperiment(DataSetMappingInformation mapping, LogUtils log, private boolean sampleExistsAndBelongsToExperiment(DataSetMappingInformation mapping,
String sampleCode) LogUtils log, String sampleCode)
{ {
if (isConnectedToExperiment(sampleCode, mapping, log) == false) if (isConnectedToExperiment(sampleCode, mapping, log) == false)
{ {
......
...@@ -36,7 +36,7 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty; ...@@ -36,7 +36,7 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty;
*/ */
public class DataSetInformationParserTest extends AbstractFileSystemTestCase public class DataSetInformationParserTest extends AbstractFileSystemTestCase
{ {
private static final String MANDATORY_HEADER = "file_name sample group\n"; private static final String MANDATORY_HEADER_SAMPLE = "file_name sample group\n";
private static final String HEADER = private static final String HEADER =
// "# user@gmail.com\n+"+ // "# user@gmail.com\n+"+
...@@ -54,7 +54,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase ...@@ -54,7 +54,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase
AssertJUnit.assertEquals(1, list.size()); AssertJUnit.assertEquals(1, list.size());
DataSetMappingInformation elem = list.get(0); DataSetMappingInformation elem = list.get(0);
AssertJUnit.assertEquals("group1", elem.getGroupCode()); AssertJUnit.assertEquals("group1", elem.getGroupCode());
AssertJUnit.assertEquals("parentCode", elem.getParentDataSetCode()); AssertJUnit.assertEquals("parentCode", elem.getParentDataSetCodes());
AssertJUnit.assertEquals("sample1", elem.getSampleCodeOrLabel()); AssertJUnit.assertEquals("sample1", elem.getSampleCodeOrLabel());
AssertJUnit.assertEquals("data.txt", elem.getFileName()); AssertJUnit.assertEquals("data.txt", elem.getFileName());
AssertJUnit.assertEquals("experiment1", elem.getExperimentName()); AssertJUnit.assertEquals("experiment1", elem.getExperimentName());
...@@ -68,9 +68,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase ...@@ -68,9 +68,7 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase
@Test @Test
public void testLoadIndexFileMandatoryColumnsOnly() public void testLoadIndexFileMandatoryColumnsOnly()
{ {
File indexFile = File indexFile = writeMappingFile(MANDATORY_HEADER_SAMPLE + "data2.txt sample2 group2");
writeMappingFile(MANDATORY_HEADER
+ "data2.txt sample2 group2");
List<DataSetMappingInformation> list = tryParse(indexFile); List<DataSetMappingInformation> list = tryParse(indexFile);
AssertJUnit.assertEquals(1, list.size()); AssertJUnit.assertEquals(1, list.size());
DataSetMappingInformation elem = list.get(0); DataSetMappingInformation elem = list.get(0);
...@@ -101,8 +99,8 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase ...@@ -101,8 +99,8 @@ public class DataSetInformationParserTest extends AbstractFileSystemTestCase
AssertJUnit.assertNull("error during parsing expected", result); AssertJUnit.assertNull("error during parsing expected", result);
List<String> logLines = readLogFile(); List<String> logLines = readLogFile();
AssertJUnit.assertEquals(2, logLines.size()); AssertJUnit.assertEquals(2, logLines.size());
AssertionUtil.assertContains( AssertionUtil.assertContains("Mandatory column(s) 'group', 'file_name' are missing",
"Mandatory column(s) 'group', 'sample', 'file_name' are missing", logLines.get(1)); logLines.get(1));
} }
private List<DataSetMappingInformation> tryParse(File indexFile) private List<DataSetMappingInformation> tryParse(File indexFile)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment