From 46f016f541bbe9cb0620d1a1fb54415a55e708c0 Mon Sep 17 00:00:00 2001 From: fedoreno <fedoreno> Date: Tue, 11 Nov 2014 09:13:25 +0000 Subject: [PATCH] SSDM-1174 reworked ByExperiment archiving policy with unit tests SVN: 32772 --- datastore_server/etc/service.properties | 8 + .../standard/archiver/ByExpermientPolicy.java | 48 +-- .../archiver/ByExperimentPolicyTest.java | 349 ++++++++++++++++++ .../shared/ServiceProviderTestWrapper.java | 1 + 4 files changed, 375 insertions(+), 31 deletions(-) create mode 100644 datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentPolicyTest.java diff --git a/datastore_server/etc/service.properties b/datastore_server/etc/service.properties index 9b3085f4aa3..e43cd36afc7 100644 --- a/datastore_server/etc/service.properties +++ b/datastore_server/etc/service.properties @@ -425,6 +425,14 @@ auto-archiver.start = 23:00 # fully qualified class name of a policy that additionally filters data sets to be filtered #auto-archiver.policy.class = ch.systemsx.cisd.etlserver.plugins.DummyAutoArchiverPolicy +# use this policy to archive datasets in batches grouped by experiment +# auto-archiver.policy.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.ByExpermientPolicy + +# the min-size in bytes, default is 0 +#auto-archiver.policy.minimal-archive-size = +# the max-size in bytes, default is 2^63-1. +#auto-archiver.policy.maximal-archive-size = + # Maintenance task (performed only once) to create paths of existing data sets in pathinfo database path-info-feeding.class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseFeedingTask path-info-feeding.execute-only-once = true diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExpermientPolicy.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExpermientPolicy.java index 4f9a019cace..b5c044e8702 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExpermientPolicy.java +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExpermientPolicy.java @@ -31,9 +31,9 @@ public class ByExpermientPolicy implements IAutoArchiverPolicy private static final Logger operationLog = LogFactory.getLogger(LogCategory.OPERATION, ByExpermientPolicy.class); - private static final String MINIMAL_ARCHIVE_SIZE = "minimal-archive-size"; + public static final String MINIMAL_ARCHIVE_SIZE = "minimal-archive-size"; - private static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size"; + public static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size"; private static final long DEFAULT_MINIMAL_ARCHIVE_SIZE = 0; @@ -70,16 +70,21 @@ public class ByExpermientPolicy implements IAutoArchiverPolicy long size = node.getCumulatedSize(); if (size >= minArchiveSize) { - TreeNode kid = node; - if (size <= maxArchiveSize) + if (node instanceof DatasetListWithTotal) { - return reportFind(kid.collectSubTree()); - } - - if (kid instanceof Grouper) + DatasetListWithTotal goodDatasetList = (DatasetListWithTotal) node; + if (size <= maxArchiveSize) + { + return goodDatasetList; + } else + { + sortBySamples(goodDatasetList); + return splitDatasets(goodDatasetList); + } + } else { @SuppressWarnings("unchecked") - Grouper<TreeNode, TreeNode> subtree = (Grouper<TreeNode, TreeNode>) kid; + Grouper<TreeNode, TreeNode> subtree = (Grouper<TreeNode, TreeNode>) node; List<AbstractExternalData> found = walkAndFind(subtree); if (found.size() > 0) { @@ -87,32 +92,13 @@ public class ByExpermientPolicy implements IAutoArchiverPolicy } // no individual subtree is bigger than min-size but this branch is so let's have a subset - return splitDataset(kid.collectSubTree()); - - } else - { - DatasetListWithTotal bigDataset = (DatasetListWithTotal) kid; - sortBySamples(bigDataset); - return splitDataset(bigDataset); + return splitDatasets(node.collectSubTree()); } } } return new ArrayList<AbstractExternalData>(); } - private List<AbstractExternalData> reportFind(List<AbstractExternalData> list) - { - long total = 0; - for (AbstractExternalData ds : list) - { - total += ds.getSize(); - operationLog.info("added ds " + ds.getCode() + " for exp " + ds.getExperiment().getCode() + " with size " + ds.getSize() + " and total " - + total); - } - - return list; - } - private void sortBySamples(DatasetListWithTotal datasets) { Collections.sort(datasets, new Comparator<AbstractExternalData>() @@ -130,7 +116,7 @@ public class ByExpermientPolicy implements IAutoArchiverPolicy }); } - private List<AbstractExternalData> splitDataset(List<AbstractExternalData> datasets) + private List<AbstractExternalData> splitDatasets(List<AbstractExternalData> datasets) { DatasetListWithTotal result = new DatasetListWithTotal(); @@ -144,7 +130,7 @@ public class ByExpermientPolicy implements IAutoArchiverPolicy result.add(ds); } - return reportFind(result); + return result; } private ProjectGroup groupDatasets(List<AbstractExternalData> dataSets) diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentPolicyTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentPolicyTest.java new file mode 100644 index 00000000000..5d4cfe73e66 --- /dev/null +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentPolicyTest.java @@ -0,0 +1,349 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.jmock.Expectations; +import org.jmock.Mockery; +import org.springframework.beans.factory.BeanFactory; +import org.testng.AssertJUnit; +import org.testng.ITestResult; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import ch.systemsx.cisd.common.properties.ExtendedProperties; +import ch.systemsx.cisd.openbis.dss.generic.shared.IDataSetPathInfoProvider; +import ch.systemsx.cisd.openbis.dss.generic.shared.ISingleDataSetPathInfoProvider; +import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProviderTestWrapper; +import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetPathInfo; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PhysicalDataSet; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; + +public class ByExperimentPolicyTest extends AssertJUnit +{ + private Mockery context; + + @BeforeMethod + public void setUpTestEnvironment() + { + context = new Mockery(); + final BeanFactory beanFactory = context.mock(BeanFactory.class); + ServiceProviderTestWrapper.setApplicationContext(beanFactory); + final IDataSetPathInfoProvider pathProviderMock = ServiceProviderTestWrapper.mock(context, IDataSetPathInfoProvider.class); + final ISingleDataSetPathInfoProvider singleDsProviderMock = ServiceProviderTestWrapper.mock(context, ISingleDataSetPathInfoProvider.class); + + ServiceProviderTestWrapper.addMock(context, IDataSetPathInfoProvider.class, + pathProviderMock); + + context.checking(new Expectations() + { + { + allowing(pathProviderMock).tryGetSingleDataSetPathInfoProvider("dsNoSize"); + will(returnValue(singleDsProviderMock)); + + allowing(singleDsProviderMock).getRootPathInfo(); + will(returnValue(new DataSetPathInfo() + { + { + setSizeInBytes(42); + } + })); + } + }); + + } + + @AfterMethod + public void checkMockExpectations(ITestResult result) + { + context.assertIsSatisfied(); + ServiceProviderTestWrapper.restoreApplicationContext(); + } + + @Test + public void testNothingFoundForNoInputAndNoConfig() + { + ExtendedProperties properties = new ExtendedProperties(); + ByExpermientPolicy sut = new ByExpermientPolicy(properties); + + List<AbstractExternalData> filtered = sut.filter(new ArrayList<AbstractExternalData>()); + + assertEquals(0, filtered.size()); + + context.assertIsSatisfied(); + } + + private static Map<String, Project> projects = new HashMap<String, Project>(); + + private static Map<String, Experiment> experiments = new HashMap<String, Experiment>(); + + private static Map<String, DataSetType> datasets = new HashMap<String, DataSetType>(); + + private static AbstractExternalData createDataset(String projectCode, String experimentCode, String datasetType, String dsCode, Long size) + { + PhysicalDataSet ds = new PhysicalDataSet(); + ds.setCode(dsCode); + + Experiment exp = experiments.get(experimentCode); + if (exp == null) + { + exp = new Experiment(); + experiments.put(experimentCode, exp); + } + + Project project = projects.get(projectCode); + if (projectCode == null) + { + project = new Project(); + project.setCode(projectCode); + exp.setProject(project); + + projects.put(projectCode, project); + } + + exp.setCode(experimentCode); + ds.setExperiment(exp); + ds.setSize(size); + + DataSetType dataSetType = datasets.get(datasetType); + if (dataSetType == null) + { + dataSetType = new DataSetType(); + dataSetType.setCode(datasetType); + + datasets.put(datasetType, dataSetType); + } + ds.setDataSetType(dataSetType); + + return ds; + } + + @Test + public void testDatasetSizeIsPatched() + { + ExtendedProperties properties = new ExtendedProperties(); + ByExpermientPolicy sut = new ByExpermientPolicy(properties); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(createDataset("p1", "234", "t1", "dsNoSize", null)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(1, filtered.size()); + assertEquals(42, filtered.get(0).getSize().longValue()); + + context.assertIsSatisfied(); + } + + @Test + public void testEverythingFromProjectIsReturnedIfDatasetsAreSmall() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "14"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "100"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(createDataset("p1", "e1", "dt1", "ds1", 2L)); + dataSets.add(createDataset("p1", "e2", "dt2", "ds2", 8L)); + dataSets.add(createDataset("p1", "e3", "dt3", "ds3", 13L)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(3, filtered.size()); + + context.assertIsSatisfied(); + } + + @Test + public void testSubsetIsReturnedIfDatasetsAreTooBig() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "6"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "10"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(createDataset("p1", "e1", "dt1", "ds1", 7L)); + dataSets.add(createDataset("p1", "e1", "dt1", "ds2", 8L)); + dataSets.add(createDataset("p1", "e1", "dt1", "ds3", 9L)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(1, filtered.size()); + + context.assertIsSatisfied(); + } + + @Test + public void testTooSmallSetsAreNotArchived() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "500"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "1000"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(createDataset("p1", "e1", "dt1", "ds1", 7L)); + dataSets.add(createDataset("p1", "e1", "dt1", "ds2", 8L)); + dataSets.add(createDataset("p1", "e1", "dt1", "ds3", 9L)); + dataSets.add(createDataset("p1", "e2", "dt1", "ds4", 9L)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(0, filtered.size()); + + context.assertIsSatisfied(); + } + + @Test + public void testSameDatatypeIsGroupedSmalls() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "10"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "1000"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(createDataset("p1", "e1", "dt1", "ds1", 7L)); + dataSets.add(createDataset("p1", "e1", "dt1", "ds2", 8L)); + dataSets.add(createDataset("p1", "e1", "dt2", "ds3", 9L)); + dataSets.add(createDataset("p1", "e2", "dt1", "ds4", 9L)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(2, filtered.size()); + + context.assertIsSatisfied(); + } + + @Test + public void testSameDatatypeIsGroupedBigs() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "6"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "1000"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + + dataSets.add(createDataset("p1", "e1", "dt1", "ds1", 17L)); + dataSets.add(createDataset("p1", "e1", "dt1", "ds2", 18L)); + dataSets.add(createDataset("p1", "e1", "dt2", "ds3", 19L)); + dataSets.add(createDataset("p1", "e2", "dt1", "ds4", 19L)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(2, filtered.size()); + + context.assertIsSatisfied(); + } + + @Test + public void testSameExperimentIsGroupedSmalls() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "10"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "1000"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(createDataset("p1", "e1", "dt1", "ds1", 7L)); + dataSets.add(createDataset("p1", "e1", "dt2", "ds2", 8L)); + dataSets.add(createDataset("p1", "e1", "dt3", "ds3", 9L)); + dataSets.add(createDataset("p1", "e2", "dt4", "ds4", 9L)); + dataSets.add(createDataset("p2", "e3", "dt1", "ds5", 9L)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(3, filtered.size()); + + context.assertIsSatisfied(); + } + + @Test + public void testSameExperimentIsGroupedBigs() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "10"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "1000"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + + dataSets.add(createDataset("p1", "e1", "dt1", "ds1", 17L)); + dataSets.add(createDataset("p1", "e1", "dt2", "ds2", 18L)); + dataSets.add(createDataset("p1", "e1", "dt3", "ds3", 19L)); + dataSets.add(createDataset("p1", "e2", "dt4", "ds4", 19L)); + dataSets.add(createDataset("p2", "e3", "dt1", "ds5", 19L)); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(1, filtered.size()); + + context.assertIsSatisfied(); + } + + @Test + public void testDatasetTypeIsSplitIfTooMany() + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(ByExpermientPolicy.MINIMAL_ARCHIVE_SIZE, "20"); + props.setProperty(ByExpermientPolicy.MAXIMAL_ARCHIVE_SIZE, "45"); + + ByExpermientPolicy sut = new ByExpermientPolicy(props); + + ArrayList<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + + Sample s1 = new Sample(); + s1.setIdentifier("s1"); + + Sample s2 = new Sample(); + s1.setIdentifier("s2"); + + Sample s3 = new Sample(); + s1.setIdentifier("s3"); + + Sample s4 = new Sample(); + s1.setIdentifier("s4"); + + Sample s5 = new Sample(); + s1.setIdentifier("s5"); // this one will be sorted out + + AbstractExternalData ds; + dataSets.add(ds = createDataset("p1", "e1", "dt1", "ds1", 10L)); + ds.setSample(s5); + dataSets.add(ds = createDataset("p1", "e1", "dt1", "ds2", 10L)); + ds.setSample(s4); + dataSets.add(ds = createDataset("p1", "e1", "dt1", "ds3", 10L)); + ds.setSample(s3); + dataSets.add(ds = createDataset("p1", "e1", "dt1", "ds4", 10L)); + ds.setSample(s2); + dataSets.add(ds = createDataset("p1", "e1", "dt1", "ds5", 10L)); + ds.setSample(s1); + + List<AbstractExternalData> filtered = sut.filter(dataSets); + + assertEquals(4, filtered.size()); + for (AbstractExternalData dsTest : filtered) + { + assertNotSame(s5, dsTest.getSampleIdentifier()); + } + + context.assertIsSatisfied(); + } +} diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/shared/ServiceProviderTestWrapper.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/shared/ServiceProviderTestWrapper.java index b3398353a65..11c3a7a6a7e 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/shared/ServiceProviderTestWrapper.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/openbis/dss/generic/shared/ServiceProviderTestWrapper.java @@ -47,6 +47,7 @@ public class ServiceProviderTestWrapper classNameToBeanName.put(IDataStoreServiceInternal.class, "data-store-service"); classNameToBeanName .put(IHierarchicalContentProvider.class, "hierarchical-content-provider"); + classNameToBeanName.put(IDataSetPathInfoProvider.class, "data-set-path-infos-provider"); } /** -- GitLab