diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/IArchiveCandidateDiscoverer.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/IArchiveCandidateDiscoverer.java new file mode 100644 index 0000000000000000000000000000000000000000..a63dc9fb357c8a55fb206db9d3df3b138d7bd6b7 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/IArchiveCandidateDiscoverer.java @@ -0,0 +1,26 @@ +package ch.systemsx.cisd.etlserver; + +import java.util.List; + +import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; + +/** + * Finds data sets that are possible candidates for archiving The implementing class must have a constructor accepting single parameter of type + * {@link java.util.Properties} + * + * @author Sascha Fedorenko + */ +public interface IArchiveCandidateDiscoverer +{ + /** + * Return a list of data sets that can be scheduled for archiving. This will be called periodically so there's no need to return everything in one + * list. First best subset is sufficient, make sure though that the older data is returned first. + * + * @param openbis an interface to search data sets with + * @param criteria general time and type criteria to start with + * @return list of data sets that the auto archiver can process + */ + List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openbis, ArchiverDataSetCriteria criteria); +} diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AgeArchiveCandidateDiscoverer.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AgeArchiveCandidateDiscoverer.java new file mode 100644 index 0000000000000000000000000000000000000000..ff16358e0f33849bca850ee975d86a58b63d71d3 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AgeArchiveCandidateDiscoverer.java @@ -0,0 +1,23 @@ +package ch.systemsx.cisd.etlserver.plugins; + +import java.util.List; + +import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer; +import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; + +/** + * Default archive candidate data set discoverer that simply finds all old data sets as specified by criteria + * + * @author Sascha Fedorenko + */ +public class AgeArchiveCandidateDiscoverer implements IArchiveCandidateDiscoverer +{ + @Override + public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openBISService, ArchiverDataSetCriteria criteria) + { + return openBISService.listAvailableDataSets(criteria); + } + +} diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java index b3187d48cae7ea7e86a828ec3dfacba437d758cc..56d75cbceba36c6e08ee2633b3c5515866b16dce 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java @@ -28,15 +28,16 @@ import ch.systemsx.cisd.common.logging.LogCategory; import ch.systemsx.cisd.common.logging.LogFactory; import ch.systemsx.cisd.common.maintenance.IMaintenanceTask; import ch.systemsx.cisd.common.properties.PropertyParametersUtil; -import ch.systemsx.cisd.common.properties.PropertyUtils; import ch.systemsx.cisd.common.properties.PropertyParametersUtil.SectionProperties; +import ch.systemsx.cisd.common.properties.PropertyUtils; import ch.systemsx.cisd.common.reflection.ClassUtils; +import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer; import ch.systemsx.cisd.etlserver.IAutoArchiverPolicy; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; /** * {@link IMaintenanceTask} performing automatic archiving of data sets. @@ -52,6 +53,8 @@ public class AutoArchiverTask implements IMaintenanceTask private static final String POLICY_SECTION_NAME = "policy"; + private static final String DISCOVERY_SECTION_NAME = "archive-candidate-discoverer"; + private static final String CLASS_PROPERTY_NAME = "class"; private static final String DATA_SET_TYPE_PROPERTY_NAME = "data-set-type"; @@ -66,15 +69,18 @@ public class AutoArchiverTask implements IMaintenanceTask private IAutoArchiverPolicy policy; + private IArchiveCandidateDiscoverer archiveCandidateDiscoverer; + private ArchiverDataSetCriteria criteria; private boolean removeFromDataStore; + @Override public void execute() { operationLog.info("start"); - List<AbstractExternalData> dataSets = policy.filter(openBISService.listAvailableDataSets(criteria)); + List<AbstractExternalData> dataSets = policy.filter(archiveCandidateDiscoverer.findDatasetsForArchiving(openBISService, criteria)); if (dataSets.isEmpty()) { operationLog.info("nothing to archive"); @@ -97,6 +103,11 @@ public class AutoArchiverTask implements IMaintenanceTask POLICY_SECTION_NAME, false); policy = createPolicyInstance(policySectionProperties); + SectionProperties discoverySectionProperties = + PropertyParametersUtil.extractSingleSectionProperties(properties, + DISCOVERY_SECTION_NAME, false); + archiveCandidateDiscoverer = createArchiveDatasetDiscoverer(discoverySectionProperties); + removeFromDataStore = PropertyUtils.getBoolean(properties, REMOVE_DATASETS_FROM_STORE, false); @@ -111,6 +122,17 @@ public class AutoArchiverTask implements IMaintenanceTask return new ArchiverDataSetCriteria(olderThan, dataSetTypeCodeOrNull, false); } + private IArchiveCandidateDiscoverer createArchiveDatasetDiscoverer(SectionProperties discoverySectionProperties) + { + String className = discoverySectionProperties.getProperties().getProperty(CLASS_PROPERTY_NAME); + if (className == null) + { + return new AgeArchiveCandidateDiscoverer(); + } + + return createInstance(discoverySectionProperties, className, IArchiveCandidateDiscoverer.class); + } + private IAutoArchiverPolicy createPolicyInstance(SectionProperties policySectionProperties) { String className = policySectionProperties.getProperties().getProperty(CLASS_PROPERTY_NAME); @@ -118,16 +140,23 @@ public class AutoArchiverTask implements IMaintenanceTask { return DummyAutoArchiverPolicy.INSTANCE; } + + return createInstance(policySectionProperties, className, IAutoArchiverPolicy.class); + } + + private static <T> T createInstance(SectionProperties constructorArguments, String className, + Class<T> interfaceToCreate) + { try { - return ClassUtils.create(IAutoArchiverPolicy.class, className, policySectionProperties + return ClassUtils.create(interfaceToCreate, className, constructorArguments .getProperties()); } catch (ConfigurationFailureException ex) { throw ex; // rethrow the exception without changing the message } catch (Exception ex) { - throw new ConfigurationFailureException("Cannot find the policy class '" + className + throw new ConfigurationFailureException("Cannot find the class '" + className + "'", CheckedExceptionTunnel.unwrapIfNecessary(ex)); } } diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExpermientPolicy.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExpermientPolicy.java new file mode 100644 index 0000000000000000000000000000000000000000..4f9a019cace7eb3667eda060e5f4b1ccccc34377 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExpermientPolicy.java @@ -0,0 +1,206 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +import org.apache.log4j.Logger; + +import ch.systemsx.cisd.common.logging.LogCategory; +import ch.systemsx.cisd.common.logging.LogFactory; +import ch.systemsx.cisd.common.properties.ExtendedProperties; +import ch.systemsx.cisd.common.properties.PropertyUtils; +import ch.systemsx.cisd.etlserver.IAutoArchiverPolicy; +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping.DataSetTypeGroup; +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping.DatasetListWithTotal; +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping.ExperimentGroup; +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping.Grouper; +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping.ProjectGroup; +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping.TreeNode; +import ch.systemsx.cisd.openbis.dss.generic.shared.IDataSetPathInfoProvider; +import ch.systemsx.cisd.openbis.dss.generic.shared.ISingleDataSetPathInfoProvider; +import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; + +public class ByExpermientPolicy implements IAutoArchiverPolicy +{ + private static final Logger operationLog = + LogFactory.getLogger(LogCategory.OPERATION, ByExpermientPolicy.class); + + private static final String MINIMAL_ARCHIVE_SIZE = "minimal-archive-size"; + + private static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size"; + + private static final long DEFAULT_MINIMAL_ARCHIVE_SIZE = 0; + + private static final long DEFAULT_MAXIMAL_ARCHIVE_SIZE = Long.MAX_VALUE; + + private long minArchiveSize; + + private long maxArchiveSize; + + private IDataSetPathInfoProvider pathInfoProvider; + + public ByExpermientPolicy(ExtendedProperties properties) + { + minArchiveSize = + PropertyUtils.getLong(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE); + + maxArchiveSize = + PropertyUtils.getLong(properties, MAXIMAL_ARCHIVE_SIZE, DEFAULT_MAXIMAL_ARCHIVE_SIZE); + } + + @Override + @SuppressWarnings("unchecked") + public List<AbstractExternalData> filter(List<AbstractExternalData> dataSets) + { + ProjectGroup prjMap = groupDatasets(dataSets); + + return walkAndFind((Grouper<? extends TreeNode, ? extends TreeNode>) prjMap); + } + + private List<AbstractExternalData> walkAndFind(Grouper<? extends TreeNode, ? extends TreeNode> tree) + { + for (TreeNode node : tree.values()) + { + long size = node.getCumulatedSize(); + if (size >= minArchiveSize) + { + TreeNode kid = node; + if (size <= maxArchiveSize) + { + return reportFind(kid.collectSubTree()); + } + + if (kid instanceof Grouper) + { + @SuppressWarnings("unchecked") + Grouper<TreeNode, TreeNode> subtree = (Grouper<TreeNode, TreeNode>) kid; + List<AbstractExternalData> found = walkAndFind(subtree); + if (found.size() > 0) + { + return found; + } + + // no individual subtree is bigger than min-size but this branch is so let's have a subset + return splitDataset(kid.collectSubTree()); + + } else + { + DatasetListWithTotal bigDataset = (DatasetListWithTotal) kid; + sortBySamples(bigDataset); + return splitDataset(bigDataset); + } + } + } + return new ArrayList<AbstractExternalData>(); + } + + private List<AbstractExternalData> reportFind(List<AbstractExternalData> list) + { + long total = 0; + for (AbstractExternalData ds : list) + { + total += ds.getSize(); + operationLog.info("added ds " + ds.getCode() + " for exp " + ds.getExperiment().getCode() + " with size " + ds.getSize() + " and total " + + total); + } + + return list; + } + + private void sortBySamples(DatasetListWithTotal datasets) + { + Collections.sort(datasets, new Comparator<AbstractExternalData>() + { + + @Override + public int compare(AbstractExternalData arg0, AbstractExternalData arg1) + { + String sid1 = arg0.getSampleIdentifier(); + sid1 = sid1 == null ? "" : sid1; + String sid2 = arg1.getSampleIdentifier(); + sid2 = sid2 == null ? "" : sid2; + return sid1.compareTo(sid2); + } + }); + } + + private List<AbstractExternalData> splitDataset(List<AbstractExternalData> datasets) + { + DatasetListWithTotal result = new DatasetListWithTotal(); + + for (AbstractExternalData ds : datasets) + { + if (result.getCumulatedSize() + ds.getSize() > maxArchiveSize) + { + continue; // optimistically try to fit as much as possible + } + + result.add(ds); + } + + return reportFind(result); + } + + private ProjectGroup groupDatasets(List<AbstractExternalData> dataSets) + { + ProjectGroup prjMap = new ProjectGroup(); + + for (AbstractExternalData ds : dataSets) + { + Long size = ds.getSize(); + if (size == null) + { + ISingleDataSetPathInfoProvider dsInfoProvider = getDatasetPathInfoProvider().tryGetSingleDataSetPathInfoProvider(ds.getCode()); + if (dsInfoProvider != null) + { + size = dsInfoProvider.getRootPathInfo().getSizeInBytes(); + ds.setSize(size); + } + } + + if (size != null) + { + Experiment exp = ds.getExperiment(); + if (exp == null) + { + exp = ds.getSample().getExperiment(); + } + + Project prj = exp.getProject(); + DataSetType type = ds.getDataSetType(); + + ExperimentGroup grpMap = prjMap.sureGet(prj); + DataSetTypeGroup expMap = grpMap.sureGet(exp); + DatasetListWithTotal dslist = expMap.sureGet(type); + + dslist.add(ds); + + expMap.addSize(size); + grpMap.addSize(size); + prjMap.addSize(size); + + } else + { + operationLog.warn("Failed determining data set size of " + ds.getCode() + ", cannot include it in archval candidates set."); + } + } + + return prjMap; + } + + private IDataSetPathInfoProvider getDatasetPathInfoProvider() + { + if (pathInfoProvider == null) + { + pathInfoProvider = ServiceProvider.getDataSetPathInfoProvider(); + } + return pathInfoProvider; + } + +} diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/DataSetTypeGroup.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/DataSetTypeGroup.java new file mode 100644 index 0000000000000000000000000000000000000000..076dea6563d21bffa48f1133d5cb02af1906f6f3 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/DataSetTypeGroup.java @@ -0,0 +1,13 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping; + +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; + +public class DataSetTypeGroup extends Grouper<DataSetType, DatasetListWithTotal> +{ + private static final long serialVersionUID = -6048320381482361970L; + + public DataSetTypeGroup() + { + super(DatasetListWithTotal.class); + } +} \ No newline at end of file diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/DatasetListWithTotal.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/DatasetListWithTotal.java new file mode 100644 index 0000000000000000000000000000000000000000..939a6f6e38b8f8bcdd6caea5eb23cd3d7c49c933 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/DatasetListWithTotal.java @@ -0,0 +1,38 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping; + +import java.util.ArrayList; +import java.util.List; + +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; + +public class DatasetListWithTotal extends ArrayList<AbstractExternalData> implements TreeNode +{ + private static final long serialVersionUID = 7158139354538463051L; + + private long dataSize = 0; + + @Override + public boolean add(AbstractExternalData e) + { + addSize(e.getSize()); + return super.add(e); + } + + @Override + public long getCumulatedSize() + { + return dataSize; + } + + @Override + public void addSize(long addon) + { + this.dataSize += addon; + } + + @Override + public List<AbstractExternalData> collectSubTree() + { + return this; + } +} \ No newline at end of file diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/ExperimentGroup.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/ExperimentGroup.java new file mode 100644 index 0000000000000000000000000000000000000000..8d6627f8901fbc124b4891be4979b28f9e06d4a1 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/ExperimentGroup.java @@ -0,0 +1,14 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping; + +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; + +public class ExperimentGroup extends Grouper<Experiment, DataSetTypeGroup> +{ + private static final long serialVersionUID = -5529811835382975484L; + + public ExperimentGroup() + { + super(DataSetTypeGroup.class); + } + +} \ No newline at end of file diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/Grouper.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/Grouper.java new file mode 100644 index 0000000000000000000000000000000000000000..5a146554cb4f58ce16b563e1fb62d3eb01fe1f4c --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/Grouper.java @@ -0,0 +1,65 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping.TreeNode; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; + +public class Grouper<TKey, TVal extends TreeNode> extends HashMap<TKey, TVal> implements TreeNode +{ + private static final long serialVersionUID = 7296635809776410306L; + + private long size = 0; + + private Class<?> clz; + + public Grouper(Class<?> clz) + { + this.clz = clz; + } + + @SuppressWarnings("unchecked") + public TVal sureGet(TKey key) + { + TVal result = get(key); + + if (result == null) + { + try + { + put(key, result = (TVal) clz.newInstance()); + } catch (Exception e) + { + throw new RuntimeException(e); + } + } + + return result; + } + + @Override + public long getCumulatedSize() + { + return size; + } + + @Override + public void addSize(long addon) + { + size += addon; + } + + @Override + public List<AbstractExternalData> collectSubTree() + { + ArrayList<AbstractExternalData> result = new ArrayList<AbstractExternalData>(); + for (TVal sub : values()) + { + result.addAll(sub.collectSubTree()); + } + + return result; + } +} \ No newline at end of file diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/ProjectGroup.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/ProjectGroup.java new file mode 100644 index 0000000000000000000000000000000000000000..a6bc2bf92ab98e7e5d23643dd0f13147991e27b3 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/ProjectGroup.java @@ -0,0 +1,13 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping; + +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; + +public class ProjectGroup extends Grouper<Project, ExperimentGroup> +{ + private static final long serialVersionUID = -1655936590485472113L; + + public ProjectGroup() + { + super(ExperimentGroup.class); + } +} \ No newline at end of file diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/TreeNode.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/TreeNode.java new file mode 100644 index 0000000000000000000000000000000000000000..b7d20f1f6115378e7940881983e83c39a2921e93 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/grouping/TreeNode.java @@ -0,0 +1,14 @@ +package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.grouping; + +import java.util.List; + +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; + +public interface TreeNode +{ + public long getCumulatedSize(); + + public void addSize(long addon); + + public List<AbstractExternalData> collectSubTree(); +} \ No newline at end of file