diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BaseGroupingPolicy.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BaseGroupingPolicy.java index addfacc387fcbf947b30b84c026ff112bc97a361..7728c9c775e73ca5e73797893d135304352f6496 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BaseGroupingPolicy.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BaseGroupingPolicy.java @@ -4,12 +4,8 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Properties; -import org.apache.log4j.Logger; - -import ch.systemsx.cisd.common.logging.LogCategory; -import ch.systemsx.cisd.common.logging.LogFactory; -import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.common.properties.PropertyUtils; import ch.systemsx.cisd.etlserver.IAutoArchiverPolicy; import ch.systemsx.cisd.etlserver.plugins.grouping.DatasetListWithTotal; @@ -26,9 +22,6 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; */ public abstract class BaseGroupingPolicy implements IAutoArchiverPolicy { - private static final Logger operationLog = - LogFactory.getLogger(LogCategory.OPERATION, BaseGroupingPolicy.class); - public static final String MINIMAL_ARCHIVE_SIZE = "minimal-archive-size"; public static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size"; @@ -43,7 +36,7 @@ public abstract class BaseGroupingPolicy implements IAutoArchiverPolicy private IDataSetPathInfoProvider pathInfoProvider; - public BaseGroupingPolicy(ExtendedProperties properties) + public BaseGroupingPolicy(Properties properties) { minArchiveSize = PropertyUtils.getLong(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE); @@ -55,6 +48,10 @@ public abstract class BaseGroupingPolicy implements IAutoArchiverPolicy @Override public final List<AbstractExternalData> filter(List<AbstractExternalData> dataSets) { + if (dataSets.isEmpty()) + { + return dataSets; + } makeSureAllDataSetsWithSize(dataSets); return filterDataSetsWithSizes(dataSets); } diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicy.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicy.java index d7e264c859061f19642033236fa351d4ea418f73..53102b27f90902b2f17f1a889b9d3bfc82decfe4 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicy.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicy.java @@ -4,13 +4,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; - -import org.apache.log4j.Logger; +import java.util.Properties; import ch.systemsx.cisd.common.collection.SimpleComparator; -import ch.systemsx.cisd.common.logging.LogCategory; -import ch.systemsx.cisd.common.logging.LogFactory; -import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.etlserver.IAutoArchiverPolicy; import ch.systemsx.cisd.etlserver.plugins.grouping.DatasetListWithTotal; import ch.systemsx.cisd.etlserver.plugins.grouping.Grouping; @@ -25,12 +21,9 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; */ public class ByExperimentPolicy extends BaseGroupingPolicy implements IAutoArchiverPolicy { - private static final Logger operationLog = - LogFactory.getLogger(LogCategory.OPERATION, ByExperimentPolicy.class); - private final List<IGroupKeyProvider> providers; - public ByExperimentPolicy(ExtendedProperties properties) + public ByExperimentPolicy(Properties properties) { super(properties); providers = new ArrayList<IGroupKeyProvider>(); diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicy.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicy.java index 2aa7e11a54c9ca9b58a6b197c27f571683f647c5..8e922393401cd2766ad82ad836c517b8531f38a1 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicy.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicy.java @@ -5,12 +5,8 @@ import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; +import java.util.Properties; -import org.apache.log4j.Logger; - -import ch.systemsx.cisd.common.logging.LogCategory; -import ch.systemsx.cisd.common.logging.LogFactory; -import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.etlserver.IAutoArchiverPolicy; import ch.systemsx.cisd.etlserver.plugins.grouping.DatasetListWithTotal; import ch.systemsx.cisd.etlserver.plugins.grouping.Grouping; @@ -24,12 +20,9 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; */ public class BySpacePolicy extends BaseGroupingPolicy implements IAutoArchiverPolicy { - private static final Logger operationLog = - LogFactory.getLogger(LogCategory.OPERATION, BySpacePolicy.class); - private final List<IGroupKeyProvider> providers; - public BySpacePolicy(ExtendedProperties properties) + public BySpacePolicy(Properties properties) { super(properties); providers = new ArrayList<IGroupKeyProvider>(); diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/GroupingPolicy.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/GroupingPolicy.java new file mode 100644 index 0000000000000000000000000000000000000000..97545b74c0f4a9c0c3ab2372eb42572828997a4c --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/GroupingPolicy.java @@ -0,0 +1,295 @@ +/* + * Copyright 2015 ETH Zuerich, SIS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.etlserver.plugins; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Logger; + +import ch.systemsx.cisd.common.collection.CollectionUtils; +import ch.systemsx.cisd.common.exceptions.ConfigurationFailureException; +import ch.systemsx.cisd.common.logging.LogCategory; +import ch.systemsx.cisd.common.logging.LogFactory; +import ch.systemsx.cisd.common.properties.PropertyUtils; +import ch.systemsx.cisd.etlserver.plugins.grouping.DatasetListWithTotal; +import ch.systemsx.cisd.etlserver.plugins.grouping.Grouping; +import ch.systemsx.cisd.etlserver.plugins.grouping.IGroupKeyProvider; +import ch.systemsx.cisd.openbis.generic.shared.basic.BasicConstant; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code; + +/** + * Configurable auto archiving policy which allows to find a group of data sets with total size + * from a specified interval. Grouping can be defined by space, project, experiment, sample, data set type or + * a combination of those. The combination defines a so called 'grouping key'. + * Groups can be merged if they are too small. Several grouping keys can be specified. + * <p> + * Searching for an appropriate group of data sets for auto archiving is logged. If no group could be found + * the admin (as specified in log.xml) is notified be email with the searching log as content. + * + * @author Franz-Josef Elmer + */ +public class GroupingPolicy extends BaseGroupingPolicy +{ + static final String GROUPING_KEYS_KEY = "grouping-keys"; + private static final Logger operationLog = + LogFactory.getLogger(LogCategory.OPERATION, GroupingPolicy.class); + private static final Logger notificationLog = + LogFactory.getLogger(LogCategory.NOTIFY, GroupingPolicy.class); + + private final List<CombinedGroupKeys> groupKeyProviders = new ArrayList<CombinedGroupKeys>(); + + public GroupingPolicy(Properties properties) + { + super(properties); + List<String> groupingKeys = PropertyUtils.getList(properties, GROUPING_KEYS_KEY); + for (String groupingKey : groupingKeys) + { + String[] splitted = groupingKey.split(":", 2); + boolean merge = false; + if (splitted.length > 1) + { + if ("merge".equals(splitted[1]) == false) + { + throw new ConfigurationFailureException("Invalid grouping key in property '" + GROUPING_KEYS_KEY + + "' because 'merge' is expected after ':': " + groupingKey); + } + merge = splitted.length < 2 ? false : "merge".equals(splitted[1]); + } + String[] keyItems = splitted[0].split("#"); + List<IGroupKeyProvider> groupings = new ArrayList<IGroupKeyProvider>(); + for (String keyItem : keyItems) + { + try + { + groupings.add(Grouping.valueOf(keyItem)); + } catch (IllegalArgumentException ex) + { + throw new ConfigurationFailureException("Invalid basic grouping key in property '" + + GROUPING_KEYS_KEY + "': " + keyItem + " (valid values are " + + Arrays.asList(Grouping.values()) + ")"); + } + } + groupKeyProviders.add(new CombinedGroupKeys(groupings, merge)); + } + } + + @Override + protected List<AbstractExternalData> filterDataSetsWithSizes(List<AbstractExternalData> dataSets) + { + List<String> log = new ArrayList<String>(); + for (CombinedGroupKeys combinedGroupKeys : groupKeyProviders) + { + List<DatasetListWithTotal> groups = splitIntoGroups(dataSets, combinedGroupKeys); + log(log, combinedGroupKeys + " has grouped " + dataSets.size() + " data sets into " + + groups.size() + " groups."); + if (groups.isEmpty() == false) + { + List<AbstractExternalData> result + = tryFindGroupOrMerge(groups, combinedGroupKeys.isMerge(), log); + if (result != null) + { + log(log, "filtered data sets: " + CollectionUtils.abbreviate(Code.extractCodes(result), 20)); + return result; + } + } + } + StringBuilder builder = new StringBuilder(); + builder.append("From " + dataSets.size() + " data sets no group could be found to be fit between "); + builder.append(FileUtils.byteCountToDisplaySize(minArchiveSize)).append(" and "); + builder.append(FileUtils.byteCountToDisplaySize(maxArchiveSize)); + builder.append("\n\nLog:"); + for (String logMessage : log) + { + builder.append('\n').append(logMessage); + } + notificationLog.warn(builder.toString()); + return new ArrayList<AbstractExternalData>(); + } + + private List<AbstractExternalData> tryFindGroupOrMerge(List<DatasetListWithTotal> groups, + boolean merge, List<String> log) + { + List<DatasetListWithTotal> tooSmallGroups = new ArrayList<DatasetListWithTotal>(); + List<DatasetListWithTotal> fittingGroups = new ArrayList<DatasetListWithTotal>(); + for (DatasetListWithTotal group : groups) + { + long size = group.getCumulatedSize(); + if (size < minArchiveSize) + { + tooSmallGroups.add(group); + } else if (size <= maxArchiveSize) + { + fittingGroups.add(group); + } + } + log(log, fittingGroups.size() + " groups match in size, " + tooSmallGroups.size() + " groups are too small and " + + (groups.size() - fittingGroups.size() - tooSmallGroups.size()) + " groups are too large."); + if (fittingGroups.isEmpty() == false) + { + return getOldestGroup(fittingGroups, log); + } + if (tooSmallGroups.size() < 2 || merge == false) + { + return null; + } + return tryMerge(tooSmallGroups, log); + } + + private List<AbstractExternalData> getOldestGroup(List<DatasetListWithTotal> groups, List<String> log) + { + if (groups.size() == 1) + { + return groups.get(0).getList(); + } + GroupWithAge oldestGroup = sortGroupsByAge(groups).get(0); + String timestamp = new SimpleDateFormat(BasicConstant.DATE_WITHOUT_TIMEZONE_PATTERN).format(new Date(oldestGroup.age)); + log(log, "All data sets have been accessed at " + timestamp + " or before."); + return oldestGroup.group.getList(); + } + + private List<GroupWithAge> sortGroupsByAge(List<DatasetListWithTotal> groups) + { + List<GroupWithAge> groupsWithAge = new ArrayList<GroupWithAge>(); + for (DatasetListWithTotal group : groups) + { + groupsWithAge.add(new GroupWithAge(group)); + } + Collections.sort(groupsWithAge); + return groupsWithAge; + } + + private List<AbstractExternalData> tryMerge(List<DatasetListWithTotal> groups, List<String> log) + { + List<GroupWithAge> groupsWithAge = sortGroupsByAge(groups); + List<AbstractExternalData> result = new ArrayList<AbstractExternalData>(); + long total = 0; + for (int i = 0; i < groupsWithAge.size(); i++) + { + DatasetListWithTotal group = groupsWithAge.get(i).group; + result.addAll(group.getList()); + total += group.getCumulatedSize(); + if (total >= minArchiveSize) + { + if (total <= maxArchiveSize) + { + log(log, (i+1) + " groups have been merged."); + return result; + } + log(log, (i+1) + " groups have been merged, but the total size of " + FileUtils.byteCountToDisplaySize(total) + + " is above the required maximum of " + FileUtils.byteCountToDisplaySize(maxArchiveSize)); + return null; + } + } + log(log, "Merging all " + groups.size() + " groups gives a total size of " + FileUtils.byteCountToDisplaySize(total) + + " which is still below required minimum of " + FileUtils.byteCountToDisplaySize(minArchiveSize)); + return null; + } + + private List<DatasetListWithTotal> splitIntoGroups(List<AbstractExternalData> dataSets, IGroupKeyProvider groupKeyProvider) + { + List<DatasetListWithTotal> groups = new ArrayList<DatasetListWithTotal>( + splitDataSetsInGroupsAccordingToCriteria(dataSets, groupKeyProvider)); + Collections.sort(groups); + return groups; + } + + private void log(List<String> log, Object logMessage) + { + log.add(logMessage.toString()); + operationLog.info(logMessage.toString()); + } + + private static final class CombinedGroupKeys implements IGroupKeyProvider + { + private final List<IGroupKeyProvider> groupKeyProviders; + private final boolean merge; + + CombinedGroupKeys(List<IGroupKeyProvider> groupKeyProviders, boolean merge) + { + this.groupKeyProviders = groupKeyProviders; + this.merge = merge; + } + + public boolean isMerge() + { + return merge; + } + + @Override + public String getGroupKey(AbstractExternalData dataset) + { + StringBuilder builder = new StringBuilder(); + for (IGroupKeyProvider groupKeyProvider : groupKeyProviders) + { + if (builder.length() > 0) + { + builder.append('#'); + } + builder.append(groupKeyProvider.getGroupKey(dataset)); + } + return builder.toString(); + } + + @Override + public String toString() + { + StringBuilder builder = new StringBuilder(); + for (IGroupKeyProvider keyProvider : groupKeyProviders) + { + if (builder.length() > 0) + { + builder.append('#'); + } + builder.append(keyProvider); + } + if (merge) + { + builder.append(":merge"); + } + return "Grouping key: '" + builder.toString() + "'"; + } + + } + + private static class GroupWithAge implements Comparable<GroupWithAge> + { + private long age; + private DatasetListWithTotal group; + GroupWithAge(DatasetListWithTotal group) + { + this.group = group; + List<AbstractExternalData> dataSets = group.getList(); + for (AbstractExternalData dataSet : dataSets) + { + age = Math.max(age, dataSet.getAccessTimestamp().getTime()); + } + } + @Override + public int compareTo(GroupWithAge that) + { + return Long.signum(this.age - that.age); + } + } +} diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/grouping/Grouping.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/grouping/Grouping.java index a117aa4dd32c070c060f0551980b27ba2954870d..fd15217a20a5d948cbd8b9bc6fc78ad061fef95d 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/grouping/Grouping.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/grouping/Grouping.java @@ -22,9 +22,16 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; /** * @author Jakub Straszewski */ - public enum Grouping implements IGroupKeyProvider { + All + { + @Override + public String getGroupKey(AbstractExternalData dataset) + { + return "all"; + } + }, Space { @Override diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/AbstractAutoArchiverPolicyTestCase.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/AbstractAutoArchiverPolicyTestCase.java new file mode 100644 index 0000000000000000000000000000000000000000..273d5c260ad5efed2c59fc8f90a3e53776bc2467 --- /dev/null +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/AbstractAutoArchiverPolicyTestCase.java @@ -0,0 +1,152 @@ +/* + * Copyright 2015 ETH Zuerich, SIS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.etlserver.plugins; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import org.testng.AssertJUnit; +import org.testng.annotations.BeforeMethod; + +import ch.systemsx.cisd.common.properties.ExtendedProperties; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PhysicalDataSet; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Space; + +/** + * + * + * @author Franz-Josef Elmer + */ +public class AbstractAutoArchiverPolicyTestCase extends AssertJUnit +{ + + + protected static final class ExecutionContext + { + private AtomicInteger counter = new AtomicInteger(); + + public AbstractExternalData createDataset(String projectCode, String experimentCode, + String datasetType, String dsCode, Long size) + { + return createDataset("___space", projectCode, experimentCode, datasetType, dsCode, size); + } + + /** + * If datasetCode is null then it gets assigned unique code containing the word "generated" + */ + public AbstractExternalData createDataset(String spaceCode, String projectCode, String experimentCode, + String datasetType, String dsCode, Long accessTimestamp, Long size) + { + return createDataset(spaceCode, projectCode, experimentCode, datasetType, null, dsCode, accessTimestamp, size); + } + + /** + * If datasetCode is null then it gets assigned unique code containing the word "generated" + */ + public AbstractExternalData createDataset(String spaceCode, String projectCode, String experimentCode, + String datasetType, String dsCode, Long size) + { + return createDataset(spaceCode, projectCode, experimentCode, datasetType, null, dsCode, size); + } + + /** + * If datasetCode is null then it gets assigned unique code containing the word "generated" + */ + public AbstractExternalData createDataset(String spaceCode, String projectCode, String experimentCode, + String datasetType, String sampleCode, String dsCode, Long size) + { + return createDataset(spaceCode, projectCode, experimentCode, datasetType, sampleCode, dsCode, null, size); + } + + /** + * If datasetCode is null then it gets assigned unique code containing the word "generated" + */ + public AbstractExternalData createDataset(String spaceCode, String projectCode, String experimentCode, + String datasetType, String sampleCode, String dsCode, Long accessTimestamp, Long size) + { + Space space = new Space(); + space.setCode(spaceCode); + space.setIdentifier("/" + space.getCode()); + + Project project = new Project(); + project.setCode(projectCode); + project.setIdentifier("/" + spaceCode + "/" + projectCode); + project.setSpace(space); + + Experiment exp = new Experiment(); + exp.setProject(project); + exp.setCode(experimentCode); + exp.setIdentifier(project.getIdentifier() + "/" + experimentCode); + + Sample sample = null; + if (sampleCode != null) + { + sample = new Sample(); + sample.setCode(sampleCode); + sample.setIdentifier(space.getIdentifier() + "/" + sample.getCode()); + sample.setExperiment(exp); + sample.setSpace(space); + } + + DataSetType dataSetType = new DataSetType(); + dataSetType.setCode(datasetType); + + PhysicalDataSet ds = new PhysicalDataSet(); + if (dsCode != null) + { + ds.setCode(dsCode); + } + else + { + ds.setCode("generated-" + counter.incrementAndGet()); + } + ds.setExperiment(exp); + ds.setSample(sample); + ds.setSize(size); + ds.setDataSetType(dataSetType); + ds.setAccessTimestamp(accessTimestamp == null ? new Date(0) : new Date(accessTimestamp)); + + return ds; + } + } + + protected ExecutionContext ctx; + + @BeforeMethod + public void setUp() + { + ctx = new ExecutionContext(); + } + + protected ExtendedProperties createPolicyProperties(long min, long max) + { + ExtendedProperties props = new ExtendedProperties(); + props.setProperty(BaseGroupingPolicy.MINIMAL_ARCHIVE_SIZE, Long.toString(min)); + props.setProperty(BaseGroupingPolicy.MAXIMAL_ARCHIVE_SIZE, Long.toString(max)); + return props; + } + +} diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicyTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicyTest.java index 4b9ab6bf8bf076c0d3b191ff6019c3d1adbcaf0e..2c996993e4ab3707aaf67a665aa6e925f8468229 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicyTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByExperimentPolicyTest.java @@ -9,6 +9,7 @@ import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.common.test.AssertionUtil; import ch.systemsx.cisd.etlserver.IAutoArchiverPolicy; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; /** @@ -37,7 +38,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(14, 100, dataSets); - assertEquals("[ds1, ds2, ds3]", extractCodes(filtered).toString()); + assertEquals("[ds1, ds2, ds3]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -52,7 +53,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(15, 25, dataSets); - assertEquals("[ds2, ds3]", extractCodes(filtered).toString()); + assertEquals("[ds2, ds3]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -68,7 +69,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filterWithoutShuffling(6, 10, dataSets); - assertEquals("[ds1]", extractCodes(filtered).toString()); + assertEquals("[ds1]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -84,7 +85,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(500, 1000, dataSets); - assertEquals("[]", extractCodes(filtered).toString()); + assertEquals("[]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -117,7 +118,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(10, 1000, dataSets); - assertEquals("[ds1, ds2]", extractCodes(filtered).toString()); + assertEquals("[ds1, ds2]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -135,7 +136,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(10, 1000, dataSets); - assertEquals("[ds1, ds2]", extractCodes(filtered).toString()); + assertEquals("[ds1, ds2]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -152,7 +153,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(10, 1000, dataSets); - assertEquals("[ds1, ds2, ds3]", extractCodes(filtered).toString()); + assertEquals("[ds1, ds2, ds3]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -170,7 +171,7 @@ public class ByExperimentPolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(10, 1000, dataSets); - assertEquals("[ds3]", extractCodes(filtered).toString()); + assertEquals("[ds3]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByPoliceAbstractTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByPoliceAbstractTest.java index 383baf59dfafc82a4bea5f45a41ca14d08383dc5..dc5184897b29301fdb9fa8eed9e7f5f92df675e7 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByPoliceAbstractTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/ByPoliceAbstractTest.java @@ -20,13 +20,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; -import java.util.concurrent.atomic.AtomicInteger; import org.jmock.Expectations; import org.jmock.Mockery; import org.springframework.beans.factory.BeanFactory; import org.springframework.test.annotation.ExpectedException; -import org.testng.AssertJUnit; import org.testng.ITestResult; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -40,34 +38,23 @@ import ch.systemsx.cisd.openbis.dss.generic.shared.ISingleDataSetPathInfoProvide import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProviderTestWrapper; import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetPathInfo; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PhysicalDataSet; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Space; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code; /** * @author Jakub Straszewski */ -public abstract class ByPoliceAbstractTest extends AssertJUnit +public abstract class ByPoliceAbstractTest extends AbstractAutoArchiverPolicyTestCase { protected Mockery context; - protected ExecutionContext ctx; - private IDataSetPathInfoProvider pathProviderMock; private ISingleDataSetPathInfoProvider singleDsProviderMock; - private AtomicInteger counter; - @BeforeMethod public void setUpTestEnvironment() { - ctx = new ExecutionContext(); context = new Mockery(); - counter = new AtomicInteger(); final BeanFactory beanFactory = context.mock(BeanFactory.class); ServiceProviderTestWrapper.setApplicationContext(beanFactory); pathProviderMock = ServiceProviderTestWrapper.mock(context, IDataSetPathInfoProvider.class); @@ -114,84 +101,6 @@ public abstract class ByPoliceAbstractTest extends AssertJUnit ServiceProviderTestWrapper.restoreApplicationContext(); } - protected class ExecutionContext - { - public AbstractExternalData createDataset(String projectCode, String experimentCode, String datasetType, String dsCode, Long size) - { - return createDataset("___space", projectCode, experimentCode, datasetType, dsCode, size); - } - - /** - * If datasetCode is null then it gets assigned unique code containing the word "generated" - */ - public AbstractExternalData createDataset(String spaceCode, String projectCode, String experimentCode, String datasetType, String dsCode, - Long size) - { - return createDataset(spaceCode, projectCode, experimentCode, datasetType, null, dsCode, size); - } - - /** - * If datasetCode is null then it gets assigned unique code containing the word "generated" - */ - public AbstractExternalData createDataset(String spaceCode, String projectCode, String experimentCode, String datasetType, String sampleCode, - String dsCode, - Long size) - { - Space space = new Space(); - space.setCode(spaceCode); - space.setIdentifier("/" + space.getCode()); - - Project project = new Project(); - project.setCode(projectCode); - project.setIdentifier("/" + spaceCode + "/" + projectCode); - project.setSpace(space); - - Experiment exp = new Experiment(); - exp.setProject(project); - exp.setCode(experimentCode); - exp.setIdentifier(project.getIdentifier() + "/" + experimentCode); - - Sample sample = null; - if (sampleCode != null) - { - sample = new Sample(); - sample.setCode(sampleCode); - sample.setIdentifier(space.getIdentifier() + "/" + sample.getCode()); - sample.setExperiment(exp); - sample.setSpace(space); - } - - DataSetType dataSetType = new DataSetType(); - dataSetType.setCode(datasetType); - - PhysicalDataSet ds = new PhysicalDataSet(); - if (dsCode != null) - { - ds.setCode(dsCode); - } - else - { - ds.setCode("generated-" + counter.incrementAndGet()); - } - ds.setExperiment(exp); - ds.setSample(sample); - ds.setSize(size); - ds.setDataSetType(dataSetType); - - return ds; - } - } - - protected List<String> extractCodes(List<AbstractExternalData> dataSets) - { - List<String> codes = new ArrayList<String>(); - for (AbstractExternalData dataSet : dataSets) - { - codes.add(dataSet.getCode()); - } - Collections.sort(codes); - return codes; - } // Some general tests for all policies @@ -201,7 +110,7 @@ public abstract class ByPoliceAbstractTest extends AssertJUnit * Creates the policy with given min and max value. To improve the robusntess of the test this method shuffles the incoming dataset and sorts the * result. */ - protected List<AbstractExternalData> filter(int min, int max, ArrayList<AbstractExternalData> dataSets) + protected List<AbstractExternalData> filter(int min, int max, List<AbstractExternalData> dataSets) { return filter(min, max, dataSets, true); } @@ -209,12 +118,12 @@ public abstract class ByPoliceAbstractTest extends AssertJUnit /** * Creates the policy with given min and max value. The input to the filtering is not shuffled. */ - protected List<AbstractExternalData> filterWithoutShuffling(int min, int max, ArrayList<AbstractExternalData> dataSets) + protected List<AbstractExternalData> filterWithoutShuffling(int min, int max, List<AbstractExternalData> dataSets) { return filter(min, max, dataSets, false); } - private List<AbstractExternalData> filter(int min, int max, ArrayList<AbstractExternalData> dataSets, boolean shuffle) + private List<AbstractExternalData> filter(int min, int max, List<AbstractExternalData> dataSets, boolean shuffle) { IAutoArchiverPolicy policy = getPolicy(min, max); @@ -292,7 +201,7 @@ public abstract class ByPoliceAbstractTest extends AssertJUnit protected void assertAllDataSetsAreNotGenerated(List<AbstractExternalData> filtered) { - if (extractCodes(filtered).toString().contains("generated")) + if (Code.extractCodes(filtered).toString().contains("generated")) { fail(getErrorMessage(filtered)); } @@ -300,7 +209,7 @@ public abstract class ByPoliceAbstractTest extends AssertJUnit protected String getErrorMessage(List<AbstractExternalData> filtered) { - return "Unexpected data sets in result of filtering data sets." + extractCodes(filtered).toString(); + return "Unexpected data sets in result of filtering data sets." + Code.extractCodes(filtered).toString(); } protected void assertTotalDataSetsSize(long expectedSize, List<AbstractExternalData> dataSets) diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicyTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicyTest.java index fb413a73101f2c9a299074573340ebc0c6a059a9..11d0de3cc40efd1db7942a71c383c9e6f0b6420a 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicyTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/BySpacePolicyTest.java @@ -11,6 +11,7 @@ import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.common.test.AssertionUtil; import ch.systemsx.cisd.etlserver.plugins.grouping.Grouping; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code; /** * @author Sascha Fedorenko @@ -60,7 +61,7 @@ public class BySpacePolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(14, 100, dataSets); - assertEquals("[ds1, ds2, ds3]", extractCodes(filtered).toString()); + assertEquals("[ds1, ds2, ds3]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -76,7 +77,7 @@ public class BySpacePolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(50, 100, dataSets); - assertEquals("[]", extractCodes(filtered).toString()); + assertEquals("[]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -93,7 +94,7 @@ public class BySpacePolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(40, 100, dataSets); - assertEquals("[ds1, ds2, ds3, ds4]", extractCodes(filtered).toString()); + assertEquals("[ds1, ds2, ds3, ds4]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -115,7 +116,7 @@ public class BySpacePolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(2, 4, dataSets); - assertEquals("[ds6, ds7, ds8]", extractCodes(filtered).toString()); + assertEquals("[ds6, ds7, ds8]", Code.extractCodes(filtered).toString()); context.assertIsSatisfied(); } @@ -190,7 +191,7 @@ public class BySpacePolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(5, 8, dataSets); - assertEquals(extractCodes(filtered).toString(), "[a, b, c, d, e, f]"); + assertEquals(Code.extractCodes(filtered).toString(), "[a, b, c, d, e, f]"); context.assertIsSatisfied(); } @@ -276,7 +277,7 @@ public class BySpacePolicyTest extends ByPoliceAbstractTest List<AbstractExternalData> filtered = filter(100, 140, dataSets); - assertEquals(extractCodes(filtered).toString(), "[a, b, c]"); + assertEquals(Code.extractCodes(filtered).toString(), "[a, b, c]"); context.assertIsSatisfied(); } @@ -354,7 +355,7 @@ public class BySpacePolicyTest extends ByPoliceAbstractTest dataSets.add(ctx.createDataset("rightSpace", "rightProject", "smallE2", "dt1", null, 60L)); dataSets.add(ctx.createDataset("rightSpace", "rightProject", "smallE3", "dt1", null, 30L)); dataSets.add(ctx.createDataset("rightSpace", "rightProject", "smallE3", "dt1", null, 30L)); - dataSets.add(ctx.createDataset("rightSpace", "rightProject", "bigE", "dt1", "toSmallSample", null, 20L)); + dataSets.add(ctx.createDataset("rightSpace", "rightProject", "bigE", "dt1", "toSmallSample", (String) null, 20L)); dataSets.add(ctx.createDataset("rightSpace", "rightProject", "bigE", "dt1", null, "a", 20L)); dataSets.add(ctx.createDataset("rightSpace", "rightProject", "bigE", "dt1", null, "b", 20L)); dataSets.add(ctx.createDataset("rightSpace", "rightProject", "bigE", "dt1", null, "c", 20L)); diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/GroupingPolicyTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/GroupingPolicyTest.java new file mode 100644 index 0000000000000000000000000000000000000000..e69a3af0db0adefd23afcaf881b82b75eb0d5b4d --- /dev/null +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/GroupingPolicyTest.java @@ -0,0 +1,555 @@ +/* + * Copyright 2015 ETH Zuerich, SIS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.etlserver.plugins; + +import java.lang.reflect.Method; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Level; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import ch.systemsx.cisd.common.exceptions.ConfigurationFailureException; +import ch.systemsx.cisd.common.logging.BufferedAppender; +import ch.systemsx.cisd.etlserver.IAutoArchiverPolicy; +import ch.systemsx.cisd.etlserver.plugins.grouping.Grouping; +import ch.systemsx.cisd.openbis.generic.shared.basic.BasicConstant; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code; +import ch.systemsx.cisd.openbis.util.LogRecordingUtils; + +/** + * + * + * @author Franz-Josef Elmer + */ +public class GroupingPolicyTest extends AbstractAutoArchiverPolicyTestCase +{ + private BufferedAppender logRecorder; + + @BeforeMethod + public void setUpLogRecorder() + { + logRecorder = LogRecordingUtils.createRecorder("%-5p %c - %m%n", Level.INFO); + } + + @AfterMethod + public void afterMethod(Method method) + { + System.out.println("======= Log content for " + method.getName() + "():"); + System.out.println(logRecorder.getLogContent()); + System.out.println("======="); + logRecorder.reset(); + } + + @Test + public void testInvalidGroupingKey() + { + assertInvalidGroupingKeys("hello", "Invalid basic grouping key in property 'grouping-keys': " + + "hello (valid values are " + Arrays.asList(Grouping.values()) + ")"); + assertInvalidGroupingKeys("Space, Space:blub", "Invalid grouping key in property 'grouping-keys' " + + "because 'merge' is expected after ':': Space:blub"); + } + + private void assertInvalidGroupingKeys(String groupingKeys, String expectedExceptionMessage) + { + try + { + createPolicy(0, 1, groupingKeys); + fail("ConfigurationFailureException expected"); + } catch (ConfigurationFailureException ex) + { + assertEquals(expectedExceptionMessage, ex.getMessage()); + } + } + + @Test + public void testAllEmpty() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + + assertEquals("[]", filter(40, 100, "All", dataSets).toString()); + assertEquals("", logRecorder.getLogContent()); + } + + @Test + public void testAll() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e3", "dt1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s2", "p2", "e4", "dt2", "ds2", 10L)); + dataSets.add(ctx.createDataset("s3", "p3", "e5", "dt3", "ds3", 10L)); + dataSets.add(ctx.createDataset("s4", "p4", "e6", "dt4", "ds4", 10L)); + + List<String> filteredDataSets = filter(40, 100, "All", dataSets); + + assertLogs(groupingKeyLog("All", 4, 1), groupsMatchLog(1, 0, 0), filteredLog(filteredDataSets)); + assertEquals("[ds1, ds2, ds3, ds4]", filteredDataSets.toString()); + } + + @Test + public void testSpaceOneGroupTooSmallNoMerge() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 10L)); + + List<String> filteredDataSets = filter(25, 100, "Space:merge", dataSets); + + assertLogs(1, "25 bytes", "100 bytes", groupingKeyLog("Space:merge", 1, 1), groupsMatchLog(0, 1, 0)); + assertEquals("[]", filteredDataSets.toString()); + } + + @Test + public void testSpaceAllGroupsTooSmall() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds2", 10L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "ds3", 10L)); + dataSets.add(ctx.createDataset("s3", "p1", "e1", "dt1", "ds4", 10L)); + + List<String> filteredDataSets = filter(25, 100 * FileUtils.ONE_MB, "Space", dataSets); + + assertLogs(4, "25 bytes", "100 MB", groupingKeyLog("Space", 4, 3), groupsMatchLog(0, 3, 0)); + assertEquals("[]", filteredDataSets.toString()); + } + + @Test + public void testSpaceAllGroupsTooSmallMerge() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 7000L, 10L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "ds2", 4000L, 11L)); + dataSets.add(ctx.createDataset("s3", "p1", "e1", "dt1", "ds3", 6000L, 12L)); + dataSets.add(ctx.createDataset("s4", "p1", "e1", "dt1", "ds4", 2000L, 13L)); + + List<String> filteredDataSets = filter(25, 100 * FileUtils.ONE_MB, "Space:merge", dataSets); + + assertLogs(groupingKeyLog("Space:merge", 4, 4), groupsMatchLog(0, 4, 0), + mergedLog(3), filteredLog(filteredDataSets)); + assertEquals("[ds4, ds2, ds3]", filteredDataSets.toString()); + } + + @Test + public void testSpaceAllGroupsTooLarge() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 40L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds2", 70L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "ds3", 101L)); + dataSets.add(ctx.createDataset("s3", "p1", "e1", "dt1", "ds4", 101L)); + + List<String> filteredDataSets = filter(25, 100, "Space", dataSets); + + assertLogs(4, "25 bytes", "100 bytes", groupingKeyLog("Space", 4, 3), groupsMatchLog(0, 0, 3)); + assertEquals("[]", filteredDataSets.toString()); + } + + @Test + public void testSpaceAllGroupsTooLargeOrTooSmall() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds2", 10L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "ds3", 101L)); + dataSets.add(ctx.createDataset("s3", "p1", "e1", "dt1", "ds4", 101L)); + + List<String> filteredDataSets = filter(25, 100, "Space", dataSets); + + assertLogs(4, "25 bytes", "100 bytes", groupingKeyLog("Space", 4, 3), groupsMatchLog(0, 1, 2)); + assertEquals("[]", filteredDataSets.toString()); + } + + @Test + public void testSpaceBestGroupsJustAtMaxSize() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds2", 10L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "ds3", 50L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "ds4", 50L)); + dataSets.add(ctx.createDataset("s3", "p1", "e1", "dt1", "ds5", 101L)); + + List<String> filteredDataSets = filter(25, 100, "Space", dataSets); + + assertLogs(groupingKeyLog("Space", 5, 3), groupsMatchLog(1, 1, 1), filteredLog(filteredDataSets)); + assertEquals("[ds3, ds4]", filteredDataSets.toString()); + } + + @Test + public void testSpaceOldestDataSet() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "ds2", 10L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt3", "ds3", 200000L, 50L)); + dataSets.add(ctx.createDataset("s3", "p1", "e1", "dt4", "ds4", 100000L, 70L)); + dataSets.add(ctx.createDataset("s4", "p1", "e1", "dt5", "ds5", 101L)); + + List<String> filteredDataSets = filter(25, 100, "Space", dataSets); + + assertLogs(groupingKeyLog("Space", 5, 4), groupsMatchLog(2, 1, 1), oldestLog(100000L), + filteredLog(filteredDataSets)); + assertEquals("[ds4]", filteredDataSets.toString()); + } + + @Test + public void testSpaceSingleton() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt3", "ds3", 50L)); + + List<String> filteredDataSets = filter(25, 100, "Space", dataSets); + + assertLogs(groupingKeyLog("Space", 1, 1), groupsMatchLog(1, 0, 0), filteredLog(filteredDataSets)); + assertEquals("[ds3]", filteredDataSets.toString()); + } + + @Test + public void testProjectDataSetTypeAllGroupsTooSmall() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e3", "dt1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e3", "dt1", "ds2", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e3", "dt2", "ds3", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e3", "dt2", "ds4", 10L)); + + List<String> filteredDataSets = filter(25, 100, "Project#DataSetType", dataSets); + + assertLogs(4, "25 bytes", "100 bytes", groupingKeyLog("Project#DataSetType", 4, 2), + groupsMatchLog(0, 2, 0)); + assertEquals("[]", filteredDataSets.toString()); + } + + @Test + public void testCombinationProjectAndDataSetType() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 20000L, 71L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt1", "ds2", 10000L, 42L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt1", "ds3", 30000L, 42L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "ds4", 15000L, 73L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt2", "ds5", 40000L, 74L)); + + List<String> filteredDataSets = filter(25, 100, "Project#DataSetType", dataSets); + + assertLogs(groupingKeyLog("Project#DataSetType", 5, 4), groupsMatchLog(4, 0, 0), oldestLog(15000), + filteredLog(filteredDataSets)); + assertEquals("[ds4]", filteredDataSets.toString()); + } + + @Test + public void testProjectAllGroupsTooLargeButSequenceProjectAndProjectDataSetTypeFits() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 20000L, 71L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt1", "ds2", 10000L, 42L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt1", "ds3", 30000L, 42L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "ds4", 15000L, 73L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt2", "ds5", 40000L, 74L)); + + assertEquals("[]", filter(25, 100, "Project", dataSets).toString()); + + logRecorder.resetLogContent(); + List<String> filteredDataSets = filter(25, 100, "Project, Project#DataSetType", dataSets); + + assertLogs(groupingKeyLog("Project", 5, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("Project#DataSetType", 5, 4), groupsMatchLog(4, 0, 0), oldestLog(15000), + filteredLog(filteredDataSets)); + assertEquals("[ds4]", filteredDataSets.toString()); + } + + @Test + public void testByExperimentTooSmall() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "ds1", 11L)); + dataSets.add(ctx.createDataset("s1", "p1", "e2", "dt1", "ds2", 12L)); + dataSets.add(ctx.createDataset("s1", "p1", "e3", "dt1", "ds3", 15L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "ds4", 13L)); + assertEquals("[]", filter(20, 50, "DataSetType#Experiment", dataSets).toString()); + logRecorder.resetLogContent(); + + List<String> filteredDataSets = filterByExperiment(20, 50, dataSets); + + assertLogs(groupingKeyLog("DataSetType#Experiment", 4, 4), groupsMatchLog(0, 4, 0), + groupingKeyLog("DataSetType#Project", 4, 2), groupsMatchLog(1, 1, 0), + filteredLog(filteredDataSets)); + assertEquals("[ds1, ds2, ds3]", filteredDataSets.toString()); + } + + @Test + public void testByExperimentTooLarge() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds1", 19L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds2", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp2", "ds3", 18L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "smp1", "ds4", 10L)); + + List<String> filteredDataSets = filterByExperiment(20, 30, dataSets); + + assertLogs(groupingKeyLog("DataSetType#Experiment", 4, 2), groupsMatchLog(0, 1, 1), + groupingKeyLog("DataSetType#Project", 4, 2), groupsMatchLog(0, 1, 1), + groupingKeyLog("DataSetType#Experiment#Sample", 4, 3), groupsMatchLog(1, 2, 0), + filteredLog(filteredDataSets)); + assertEquals("[ds1, ds2]", filteredDataSets.toString()); + } + + @Test + public void testBySpaceTooSmall() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds1", 6 * FileUtils.ONE_KB)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds2", 2 * FileUtils.ONE_KB)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "smp2", "ds3", 8 * FileUtils.ONE_KB)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "smp1", "ds4", 1 * FileUtils.ONE_KB)); + + List<String> filteredDataSets = filterBySpace(20 * FileUtils.ONE_KB, 30 * FileUtils.ONE_KB, dataSets); + + assertLogs(4, "20 KB", "30 KB", groupingKeyLog("DataSetType#Space", 4, 3), groupsMatchLog(0, 3, 0), + groupingKeyLog("DataSetType#Project:merge", 4, 3), groupsMatchLog(0, 3, 0), + mergedTooSmallLog(3, "17 KB", "20 KB"), + groupingKeyLog("DataSetType#Experiment:merge", 4, 3), groupsMatchLog(0, 3, 0), + mergedTooSmallLog(3, "17 KB", "20 KB"), + groupingKeyLog("DataSetType#Experiment#Sample:merge", 4, 3), groupsMatchLog(0, 3, 0), + mergedTooSmallLog(3, "17 KB", "20 KB"), + groupingKeyLog("DataSet:merge", 4, 4), groupsMatchLog(0, 4, 0), + mergedTooSmallLog(4, "17 KB", "20 KB")); + assertEquals("[]", filteredDataSets.toString()); + } + + @Test + public void testBySpaceTooBigButProjectLevelFits() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "smp1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds2", 11L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds3", 12L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt1", "smp1", "ds4", 13L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "smp2", "ds5", 14L)); + + List<String> filteredDataSets = filterBySpace(20, 30, dataSets); + + assertLogs(groupingKeyLog("DataSetType#Space", 5, 3), groupsMatchLog(0, 2, 1), + groupingKeyLog("DataSetType#Project:merge", 5, 4), groupsMatchLog(1, 3, 0), + filteredLog(filteredDataSets)); + assertEquals("[ds2, ds3]", filteredDataSets.toString()); + } + + @Test + public void testBySpaceTooBigOrTooSmallProjectLevelTooSmallButProjectGroupsMerged() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt2", "smp1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds2", 11L)); + dataSets.add(ctx.createDataset("s1", "p3", "e1", "dt1", "smp1", "ds3", 12L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt1", "smp1", "ds4", 13L)); + dataSets.add(ctx.createDataset("s2", "p1", "e1", "dt1", "smp2", "ds5", 14L)); + + List<String> filteredDataSets = filterBySpace(20, 30, dataSets); + + assertLogs(groupingKeyLog("DataSetType#Space", 5, 3), groupsMatchLog(0, 2, 1), + groupingKeyLog("DataSetType#Project:merge", 5, 5), groupsMatchLog(0, 5, 0), + mergedLog(2), filteredLog(filteredDataSets)); + assertEquals("[ds1, ds2]", filteredDataSets.toString()); + } + + @Test + public void testBySpaceTooBigButExperimentLevelFits() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds1", 10L)); + dataSets.add(ctx.createDataset("s1", "p1", "e2", "dt1", "smp1", "ds2", 11L)); + dataSets.add(ctx.createDataset("s1", "p1", "e2", "dt1", "smp1", "ds3", 11L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt2", "smp1", "ds4", 12L)); + dataSets.add(ctx.createDataset("s1", "p2", "e2", "dt2", "smp1", "ds5", 13L)); + dataSets.add(ctx.createDataset("s1", "p2", "e2", "dt2", "smp2", "ds6", 14L)); + + List<String> filteredDataSets = filterBySpace(20, 30, dataSets); + + assertLogs(groupingKeyLog("DataSetType#Space", 6, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("DataSetType#Project:merge", 6, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("DataSetType#Experiment:merge", 6, 4), groupsMatchLog(2, 2, 0), + oldestLog(0), filteredLog(filteredDataSets)); + assertEquals("[ds2, ds3]", filteredDataSets.toString()); + } + + @Test + public void testBySpaceTooBigExperimentLevelTooSmallAndMergedTooBig() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds1", 15L)); + dataSets.add(ctx.createDataset("s1", "p1", "e2", "dt1", "smp1", "ds2", 18L)); + dataSets.add(ctx.createDataset("s1", "p1", "e2", "dt1", "smp2", "ds3", 19L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt2", "smp1", "ds4", 16L)); + dataSets.add(ctx.createDataset("s1", "p2", "e2", "dt2", "smp1", "ds5", 18L)); + dataSets.add(ctx.createDataset("s1", "p2", "e2", "dt2", "smp2", "ds6", 19L)); + + List<String> filteredDataSets = filterBySpace(20, 30, dataSets); + + assertLogs(6, "20 bytes", "30 bytes", groupingKeyLog("DataSetType#Space", 6, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("DataSetType#Project:merge", 6, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("DataSetType#Experiment:merge", 6, 4), groupsMatchLog(0, 2, 2), + mergedTooLargeLog(2, "31 bytes", "30 bytes"), + groupingKeyLog("DataSetType#Experiment#Sample:merge", 6, 6), groupsMatchLog(0, 6, 0), + mergedTooLargeLog(2, "31 bytes", "30 bytes"), + groupingKeyLog("DataSet:merge", 6, 6), groupsMatchLog(0, 6, 0), + mergedTooLargeLog(2, "31 bytes", "30 bytes")); + assertEquals("[]", filteredDataSets.toString()); + } + + @Test + public void testBySpaceTooBigButSampleLevelFits() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds1", 22000L, 20L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp2", "ds2", 24000L, 21L)); + dataSets.add(ctx.createDataset("s1", "p1", "e2", "dt1", "smp1", "ds3", 26000L, 22L)); + dataSets.add(ctx.createDataset("s1", "p1", "e2", "dt1", "smp2", "ds4", 28000L, 23L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt2", "smp1", "ds5", 21000L, 24L)); + dataSets.add(ctx.createDataset("s1", "p2", "e1", "dt2", "smp2", "ds6", 23000L, 25L)); + dataSets.add(ctx.createDataset("s1", "p2", "e2", "dt2", "smp1", "ds7", 25000L, 26L)); + dataSets.add(ctx.createDataset("s1", "p2", "e2", "dt2", "smp2", "ds8", 27000L, 27L)); + + List<String> filteredDataSets = filterBySpace(20, 30, dataSets); + + assertLogs(groupingKeyLog("DataSetType#Space", 8, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("DataSetType#Project:merge", 8, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("DataSetType#Experiment:merge", 8, 4), groupsMatchLog(0, 0, 4), + groupingKeyLog("DataSetType#Experiment#Sample:merge", 8, 8), groupsMatchLog(8, 0, 0), + oldestLog(21000), filteredLog(filteredDataSets)); + assertEquals("[ds5]", filteredDataSets.toString()); + } + + @Test + public void testBySpaceTooBigButDataSetLevelFits() + { + List<AbstractExternalData> dataSets = new ArrayList<AbstractExternalData>(); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds1", 19L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp1", "ds2", 19L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp2", "ds3", 20L)); + dataSets.add(ctx.createDataset("s1", "p1", "e1", "dt1", "smp2", "ds4", 19L)); + + List<String> filteredDataSets = filterBySpace(20, 30, dataSets); + + assertLogs(groupingKeyLog("DataSetType#Space", 4, 1), groupsMatchLog(0, 0, 1), + groupingKeyLog("DataSetType#Project:merge", 4, 1), groupsMatchLog(0, 0, 1), + groupingKeyLog("DataSetType#Experiment:merge", 4, 1), groupsMatchLog(0, 0, 1), + groupingKeyLog("DataSetType#Experiment#Sample:merge", 4, 2), groupsMatchLog(0, 0, 2), + groupingKeyLog("DataSet:merge", 4, 4), groupsMatchLog(1, 3, 0), + filteredLog(filteredDataSets)); + assertEquals("[ds3]", filteredDataSets.toString()); + } + + private List<String> filterBySpace(long min, long max, List<AbstractExternalData> dataSets) + { + return filter(min, max, "DataSetType#Space, DataSetType#Project:merge, DataSetType#Experiment:merge, " + + "DataSetType#Experiment#Sample:merge, DataSet:merge", dataSets); + } + + private List<String> filterByExperiment(long min, long max, List<AbstractExternalData> dataSets) + { + return filter(min, max, "DataSetType#Experiment, DataSetType#Project, DataSetType#Experiment#Sample", dataSets); + } + + private List<String> filter(long min, long max, String groupingKeys, List<AbstractExternalData> dataSets) + { + return Code.extractCodes(createPolicy(min, max, groupingKeys).filter(dataSets)); + } + + private IAutoArchiverPolicy createPolicy(long min, long max, String groupingKeys) + { + Properties properties = createPolicyProperties(min, max); + properties.setProperty(GroupingPolicy.GROUPING_KEYS_KEY, groupingKeys); + return new GroupingPolicy(properties); + } + + private String groupingKeyLog(String key, int dataSets, int groups) + { + return String.format("Grouping key: '%s' has grouped %d data sets into %d groups.", key, dataSets, groups); + } + + private String groupsMatchLog(int match, int tooSmall, int tooLarge) + { + return String.format("%d groups match in size, %d groups are too small and %d groups are too large.", + match, tooSmall, tooLarge); + } + + private String oldestLog(long timestamp) + { + return String.format("All data sets have been accessed at %s or before.", + new SimpleDateFormat(BasicConstant.DATE_WITHOUT_TIMEZONE_PATTERN).format(new Date(timestamp))); + } + + private String mergedLog(int groups) + { + return groups + " groups have been merged."; + } + + private String mergedTooSmallLog(int groups, String size, String minSize) + { + return "Merging all " + groups + " groups gives a total size of " + size + + " which is still below required minimum of " + minSize; + } + + private String mergedTooLargeLog(int groups, String size, String maxSize) + { + return groups + " groups have been merged, but the total size of " + size + + " is above the required maximum of " + maxSize; + } + + private String filteredLog(Object filteredDataSets) + { + return String.format("filtered data sets: %s", filteredDataSets); + } + + private void assertLogs(int dataSets, String minSize, String maxSize, String...expectedLogEntries) + { + StringBuilder builder = createBasicLogExpectation(expectedLogEntries); + builder.append(String.format("WARN NOTIFY.GroupingPolicy - " + + "From %d data sets no group could be found to be fit between %s and %s\n\nLog:\n", + dataSets, minSize, maxSize)); + for (String logEntry : expectedLogEntries) + { + builder.append(logEntry).append('\n'); + } + assertEquals(builder.toString().trim(), logRecorder.getLogContent()); + } + + private void assertLogs(String...expectedLogEntries) + { + assertEquals(createBasicLogExpectation(expectedLogEntries).toString().trim(), logRecorder.getLogContent()); + } + + private StringBuilder createBasicLogExpectation(String... expectedLogEntries) + { + StringBuilder builder = new StringBuilder(); + for (String logEntry : expectedLogEntries) + { + builder.append("INFO OPERATION.GroupingPolicy - ").append(logEntry).append('\n'); + } + return builder; + } +}