diff --git a/datastore_server/etc/service.properties b/datastore_server/etc/service.properties index 9b3085f4aa32dfeb67b0bc3e87c84c62f518e18d..5842da0a1d304d113d840d5cda8d31b480b67228 100644 --- a/datastore_server/etc/service.properties +++ b/datastore_server/etc/service.properties @@ -53,7 +53,7 @@ highwater-mark = 1048576 notify-successful-registration = false # The URL of the openBIS server -server-url = http://localhost:8888 +server-url = http://localhost:8888/ # Time out for accessing openBIS server. Default value is 5 minutes. server-timeout-in-minutes = 10 @@ -81,7 +81,7 @@ cifex-admin-username = cifex-admin-password = # The base URL for Web client access. -download-url = http://localhost:8889 +download-url = http://localhost:8889/ # SMTP properties (must start with 'mail' to be considered). mail.smtp.host = file://${root-dir}/email @@ -117,7 +117,7 @@ data-set-file-name-entity-separator = _ # The period of no write access that needs to pass before an incoming data item is considered # complete and ready to be processed (in seconds) [default: 300]. # Valid only when auto-detection method is used to determine if an incoming data are ready to be processed. -quiet-period = 3 +quiet-period = 30 # --------------------------------------------------------------------------- # reporting and processing plugins configuration @@ -385,12 +385,13 @@ hcs-image-overview.label = plugin for HCS_IMAGE # Archiver class specification (together with the list of packages this class belongs to). #archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.demo.DemoArchiver -archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.RsyncArchiver +archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.TarArchiver +archiver.default-archive-folder = /Users/fedoreno/tmp/openbis/tar # dectination of the archive (can be local or remote) # local: #archiver.destination = openbis:tmp/dest # remote: -archiver.destination = /Users/openbis/dest +archiver.destination = /Users/fedoreno/tmp/openbis/dest # indicates if data should be synchronized when local copy differs from one in archive (default: true) archiver.synchronize-archive = true archiver.batch-size-in-bytes = 20000000 @@ -416,15 +417,23 @@ auto-archiver.class = ch.systemsx.cisd.etlserver.plugins.AutoArchiverTask # The time between subsequent archivizations (in seconds) auto-archiver.interval = 10 # Time of the first execution (HH:mm) -auto-archiver.start = 23:00 +# auto-archiver.start = 16:10 # following properties are optional # only data sets of specified type will be archived #auto-archiver.data-set-type = UNKNOWN # only data sets that are older than specified number of days will be archived (default = 0) -#auto-archiver.older-than = 90 +auto-archiver.older-than = 0 # fully qualified class name of a policy that additionally filters data sets to be filtered #auto-archiver.policy.class = ch.systemsx.cisd.etlserver.plugins.DummyAutoArchiverPolicy +# use this archiver to archive datasets in batches grouped by experiment +#auto-archiver.archive-candidate-discoverer.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.ByExperimentArchiveCandidateDiscoverer + +# the min-size in bytes, default is 0 +#auto-archiver.archive-candidate-discoverer.minimal-archive-size = +# the max-size in bytes, default is 2^63-1. Set it to accomodate at least couple of average datasets +#auto-archiver.archive-candidate-discoverer.maximal-archive-size = + # Maintenance task (performed only once) to create paths of existing data sets in pathinfo database path-info-feeding.class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseFeedingTask path-info-feeding.execute-only-once = true diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/IArchiveCandidateDiscoverer.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/IArchiveCandidateDiscoverer.java index 0625d66bdbc2a101d20404955f468d77edfd227f..a63dc9fb357c8a55fb206db9d3df3b138d7bd6b7 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/IArchiveCandidateDiscoverer.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/IArchiveCandidateDiscoverer.java @@ -1,26 +1,19 @@ package ch.systemsx.cisd.etlserver; import java.util.List; -import java.util.Properties; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; /** - * Finds data sets that are possible candidates for archiving + * Finds data sets that are possible candidates for archiving The implementing class must have a constructor accepting single parameter of type + * {@link java.util.Properties} * * @author Sascha Fedorenko */ public interface IArchiveCandidateDiscoverer { - /** - * Initialize the discoverer with specific properties - * - * @param properties - */ - void initialize(Properties properties); - /** * Return a list of data sets that can be scheduled for archiving. This will be called periodically so there's no need to return everything in one * list. First best subset is sufficient, make sure though that the older data is returned first. diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AgeArchiveCandidateDiscoverer.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AgeArchiveCandidateDiscoverer.java index e2df98a7ced3d91e82b7da9d0e6268a02f95ebad..ff16358e0f33849bca850ee975d86a58b63d71d3 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AgeArchiveCandidateDiscoverer.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AgeArchiveCandidateDiscoverer.java @@ -1,7 +1,6 @@ package ch.systemsx.cisd.etlserver.plugins; import java.util.List; -import java.util.Properties; import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; @@ -9,22 +8,16 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; /** - * Default archive candidate data set discoverer that simply finds all "old" data sets + * Default archive candidate data set discoverer that simply finds all old data sets as specified by criteria * - * @author fedoreno + * @author Sascha Fedorenko */ public class AgeArchiveCandidateDiscoverer implements IArchiveCandidateDiscoverer { - @Override public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openBISService, ArchiverDataSetCriteria criteria) { return openBISService.listAvailableDataSets(criteria); } - @Override - public void initialize(Properties properties) - { - } - } diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java index f841831e8dffbcff852950d7288bfd2f59ce7149..56d75cbceba36c6e08ee2633b3c5515866b16dce 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/AutoArchiverTask.java @@ -107,7 +107,6 @@ public class AutoArchiverTask implements IMaintenanceTask PropertyParametersUtil.extractSingleSectionProperties(properties, DISCOVERY_SECTION_NAME, false); archiveCandidateDiscoverer = createArchiveDatasetDiscoverer(discoverySectionProperties); - archiveCandidateDiscoverer.initialize(properties); removeFromDataStore = PropertyUtils.getBoolean(properties, REMOVE_DATASETS_FROM_STORE, false); diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentArchiveCandidateDiscoverer.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentArchiveCandidateDiscoverer.java index 0b3b3fec289611109476ee2f5b184b8ec9be9788..560c1c1b5ca94839ff2037e129a8586531d3961f 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentArchiveCandidateDiscoverer.java +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/archiver/ByExperimentArchiveCandidateDiscoverer.java @@ -1,6 +1,5 @@ package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -9,23 +8,23 @@ import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Properties; +import java.util.SortedMap; +import java.util.TreeMap; -import org.apache.commons.lang.time.DateUtils; import org.apache.log4j.Logger; import ch.systemsx.cisd.common.logging.LogCategory; import ch.systemsx.cisd.common.logging.LogFactory; +import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.common.properties.PropertyUtils; import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer; +import ch.systemsx.cisd.openbis.dss.generic.shared.IDataSetPathInfoProvider; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; -import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria; -import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.CompareMode; -import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClause; -import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClauseAttribute; -import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClauseTimeAttribute; +import ch.systemsx.cisd.openbis.dss.generic.shared.ISingleDataSetPathInfoProvider; +import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; /** @@ -42,13 +41,15 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate private static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size"; - private static final int DEFAULT_MINIMAL_ARCHIVE_SIZE = 0; + private static final long DEFAULT_MINIMAL_ARCHIVE_SIZE = 0; - private static final int DEFAULT_MAXIMAL_ARCHIVE_SIZE = Integer.MAX_VALUE; + private static final long DEFAULT_MAXIMAL_ARCHIVE_SIZE = Long.MAX_VALUE; - private int minArchiveSize; + private long minArchiveSize; - private int maxArchiveSize; + private long maxArchiveSize; + + private IDataSetPathInfoProvider pathInfoProvider; private static class DatasetArchInfo implements Comparable<DatasetArchInfo> { @@ -65,65 +66,148 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate } } + public ByExperimentArchiveCandidateDiscoverer(ExtendedProperties properties) + { + minArchiveSize = + PropertyUtils.getLong(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE); + + maxArchiveSize = + PropertyUtils.getLong(properties, MAXIMAL_ARCHIVE_SIZE, DEFAULT_MAXIMAL_ARCHIVE_SIZE); + } + @Override public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openbis, ArchiverDataSetCriteria criteria) { - SearchCriteria sc = new SearchCriteria(); - sc.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, criteria.tryGetDataSetTypeCode())); + List<AbstractExternalData> dataSets = openbis.listAvailableDataSets(criteria); + + DatasetArchInfo[] sortedCandidates = organizeCandidates(dataSets); - SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - String dateBefore = dateFormat.format(DateUtils.addDays(new Date(), -criteria.getOlderThan())); - sc.addMatchClause(MatchClause.createTimeAttributeMatch(MatchClauseTimeAttribute.MODIFICATION_DATE, CompareMode.LESS_THAN_OR_EQUAL, - dateBefore, "0")); + if (sortedCandidates.length == 0) + { + return new ArrayList<AbstractExternalData>(0); + } - // TODO: not yet archived - // sc.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute., desiredValue)); + SortedMap<Project, DatasetArchInfo> byProject = groupByProject(sortedCandidates); - List<AbstractExternalData> dataSets = openbis.searchForDataSets(sc); + boolean hadGoodCandidates = false; + for (Project p : byProject.keySet()) + { + DatasetArchInfo projectSets = byProject.get(p); + if (projectSets.totalSize > minArchiveSize) + { + hadGoodCandidates = true; + if (projectSets.totalSize < maxArchiveSize) + { + return reportFind(projectSets.datasets); + } + + List<AbstractExternalData> projectSubset = selectSuitableSubsetBySample(projectSets.datasets); + if (projectSubset.size() > 0) + { + return reportFind(projectSubset); + } + } + } - Map<Project, DatasetArchInfo> candidates = new HashMap<Project, DatasetArchInfo>(); + if (hadGoodCandidates) + { + operationLog.info("Found datasets matching By Experiment archivation policy, but no subset fit within " + + "MINIMAL_ARCHIVE_SIZE and MAXIMAL_ARCHIVE_SIZE criteria."); + } + + return new ArrayList<AbstractExternalData>(); + } + + private List<AbstractExternalData> reportFind(List<AbstractExternalData> datasets) + { + for (AbstractExternalData ds : datasets) + { + operationLog.info("Will archive " + ds.getCode() + " with experiment " + ds.getExperiment().getCode()); + } + return datasets; + } + + private SortedMap<Project, DatasetArchInfo> groupByProject(DatasetArchInfo[] sortedCandidates) + { + SortedMap<Project, DatasetArchInfo> result = new TreeMap<Project, DatasetArchInfo>(); + for (DatasetArchInfo info : sortedCandidates) + { + Project project = info.datasets.get(0).getExperiment().getProject(); + DatasetArchInfo current = result.get(project); + if (current == null) + { + current = new DatasetArchInfo(); + } + + current.datasets.addAll(info.datasets); + current.totalSize += info.totalSize; + result.put(project, current); + } + return result; + } + + private DatasetArchInfo[] organizeCandidates(List<AbstractExternalData> dataSets) + { + Map<Experiment, DatasetArchInfo> candidates = new HashMap<Experiment, DatasetArchInfo>(); for (AbstractExternalData ds : dataSets) { - Project project = ds.getExperiment().getProject(); - DatasetArchInfo candidate = candidates.get(project); + Experiment experiment = ds.getExperiment(); + DatasetArchInfo candidate = candidates.get(experiment); if (candidate == null) { candidate = new DatasetArchInfo(); } candidate.datasets.add(ds); - candidate.totalSize += ds.getSize(); - if (candidate.minDate.compareTo(ds.getModificationDate()) > 0) + + Long size = ds.getSize(); + if (size == null) { - candidate.minDate = ds.getModificationDate(); + ISingleDataSetPathInfoProvider dsInfoProvider = getDatasetPathInfoProvider().tryGetSingleDataSetPathInfoProvider(ds.getCode()); + if (dsInfoProvider != null) + { + size = dsInfoProvider.getRootPathInfo().getSizeInBytes(); + ds.setSize(size); + } } - candidates.put(project, candidate); - } - - DatasetArchInfo[] sortedCandidates = candidates.values().toArray(new DatasetArchInfo[candidates.size()]); - Arrays.sort(sortedCandidates); - - for (DatasetArchInfo ai : sortedCandidates) - { - if (ai.totalSize > minArchiveSize) + if (size != null) { - if (ai.totalSize < maxArchiveSize) + candidate.totalSize += size; + if (candidate.minDate.compareTo(ds.getModificationDate()) > 0) { - return ai.datasets; + candidate.minDate = ds.getModificationDate(); } - return selectSuitableSubset(ai.datasets); + candidates.put(experiment, candidate); + } else + { + operationLog.warn("Failed determining data set size of " + ds.getCode() + ", cannot include it in archval candidates set."); } } - operationLog.info("No dataset collection matches By Experiment archivation policy."); + if (candidates.size() == 0) + { + return new DatasetArchInfo[0]; + } - return new ArrayList<AbstractExternalData>(); + + DatasetArchInfo[] sortedCandidates = candidates.values().toArray(new DatasetArchInfo[candidates.size()]); + Arrays.sort(sortedCandidates); + return sortedCandidates; } - private List<AbstractExternalData> selectSuitableSubset(List<AbstractExternalData> datasets) + private IDataSetPathInfoProvider getDatasetPathInfoProvider() + { + if (pathInfoProvider == null) + { + pathInfoProvider = ServiceProvider.getDataSetPathInfoProvider(); + } + return pathInfoProvider; + } + + private List<AbstractExternalData> selectSuitableSubsetBySample(List<AbstractExternalData> datasets) { ArrayList<AbstractExternalData> result = new ArrayList<AbstractExternalData>(); @@ -144,7 +228,7 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate long curSize = 0; for (AbstractExternalData ds : datasets) { - if (curSize + ds.getSize() > maxArchiveSize) + if (curSize + ds.getSize() > maxArchiveSize && curSize > minArchiveSize) { return result; } @@ -152,21 +236,11 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate curSize += ds.getSize(); } - operationLog.warn("Found datasets matching By Experiment archivation policy " - + "but wasn't able to pick a subset for MAXIMAL_ARCHIVE_SIZE criteria."); + if (curSize < minArchiveSize) + { + return new ArrayList<AbstractExternalData>(); + } return result; } - - @Override - public void initialize(Properties properties) - { - minArchiveSize = - PropertyUtils.getInt(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE); - - maxArchiveSize = - PropertyUtils.getInt(properties, MAXIMAL_ARCHIVE_SIZE, DEFAULT_MAXIMAL_ARCHIVE_SIZE); - - } - }