Skip to content
Snippets Groups Projects
Commit 3f4febfa authored by fedoreno's avatar fedoreno
Browse files

first working ByExpermient archiver

SVN: 32750
parent e6563832
No related branches found
No related tags found
No related merge requests found
...@@ -53,7 +53,7 @@ highwater-mark = 1048576 ...@@ -53,7 +53,7 @@ highwater-mark = 1048576
notify-successful-registration = false notify-successful-registration = false
# The URL of the openBIS server # The URL of the openBIS server
server-url = http://localhost:8888 server-url = http://localhost:8888/
# Time out for accessing openBIS server. Default value is 5 minutes. # Time out for accessing openBIS server. Default value is 5 minutes.
server-timeout-in-minutes = 10 server-timeout-in-minutes = 10
...@@ -81,7 +81,7 @@ cifex-admin-username = ...@@ -81,7 +81,7 @@ cifex-admin-username =
cifex-admin-password = cifex-admin-password =
# The base URL for Web client access. # The base URL for Web client access.
download-url = http://localhost:8889 download-url = http://localhost:8889/
# SMTP properties (must start with 'mail' to be considered). # SMTP properties (must start with 'mail' to be considered).
mail.smtp.host = file://${root-dir}/email mail.smtp.host = file://${root-dir}/email
...@@ -117,7 +117,7 @@ data-set-file-name-entity-separator = _ ...@@ -117,7 +117,7 @@ data-set-file-name-entity-separator = _
# The period of no write access that needs to pass before an incoming data item is considered # The period of no write access that needs to pass before an incoming data item is considered
# complete and ready to be processed (in seconds) [default: 300]. # complete and ready to be processed (in seconds) [default: 300].
# Valid only when auto-detection method is used to determine if an incoming data are ready to be processed. # Valid only when auto-detection method is used to determine if an incoming data are ready to be processed.
quiet-period = 3 quiet-period = 30
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# reporting and processing plugins configuration # reporting and processing plugins configuration
...@@ -385,12 +385,13 @@ hcs-image-overview.label = plugin for HCS_IMAGE ...@@ -385,12 +385,13 @@ hcs-image-overview.label = plugin for HCS_IMAGE
# Archiver class specification (together with the list of packages this class belongs to). # Archiver class specification (together with the list of packages this class belongs to).
#archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.demo.DemoArchiver #archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.demo.DemoArchiver
archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.RsyncArchiver archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.TarArchiver
archiver.default-archive-folder = /Users/fedoreno/tmp/openbis/tar
# dectination of the archive (can be local or remote) # dectination of the archive (can be local or remote)
# local: # local:
#archiver.destination = openbis:tmp/dest #archiver.destination = openbis:tmp/dest
# remote: # remote:
archiver.destination = /Users/openbis/dest archiver.destination = /Users/fedoreno/tmp/openbis/dest
# indicates if data should be synchronized when local copy differs from one in archive (default: true) # indicates if data should be synchronized when local copy differs from one in archive (default: true)
archiver.synchronize-archive = true archiver.synchronize-archive = true
archiver.batch-size-in-bytes = 20000000 archiver.batch-size-in-bytes = 20000000
...@@ -416,15 +417,23 @@ auto-archiver.class = ch.systemsx.cisd.etlserver.plugins.AutoArchiverTask ...@@ -416,15 +417,23 @@ auto-archiver.class = ch.systemsx.cisd.etlserver.plugins.AutoArchiverTask
# The time between subsequent archivizations (in seconds) # The time between subsequent archivizations (in seconds)
auto-archiver.interval = 10 auto-archiver.interval = 10
# Time of the first execution (HH:mm) # Time of the first execution (HH:mm)
auto-archiver.start = 23:00 # auto-archiver.start = 16:10
# following properties are optional # following properties are optional
# only data sets of specified type will be archived # only data sets of specified type will be archived
#auto-archiver.data-set-type = UNKNOWN #auto-archiver.data-set-type = UNKNOWN
# only data sets that are older than specified number of days will be archived (default = 0) # only data sets that are older than specified number of days will be archived (default = 0)
#auto-archiver.older-than = 90 auto-archiver.older-than = 0
# fully qualified class name of a policy that additionally filters data sets to be filtered # fully qualified class name of a policy that additionally filters data sets to be filtered
#auto-archiver.policy.class = ch.systemsx.cisd.etlserver.plugins.DummyAutoArchiverPolicy #auto-archiver.policy.class = ch.systemsx.cisd.etlserver.plugins.DummyAutoArchiverPolicy
# use this archiver to archive datasets in batches grouped by experiment
#auto-archiver.archive-candidate-discoverer.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.ByExperimentArchiveCandidateDiscoverer
# the min-size in bytes, default is 0
#auto-archiver.archive-candidate-discoverer.minimal-archive-size =
# the max-size in bytes, default is 2^63-1. Set it to accomodate at least couple of average datasets
#auto-archiver.archive-candidate-discoverer.maximal-archive-size =
# Maintenance task (performed only once) to create paths of existing data sets in pathinfo database # Maintenance task (performed only once) to create paths of existing data sets in pathinfo database
path-info-feeding.class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseFeedingTask path-info-feeding.class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseFeedingTask
path-info-feeding.execute-only-once = true path-info-feeding.execute-only-once = true
......
package ch.systemsx.cisd.etlserver; package ch.systemsx.cisd.etlserver;
import java.util.List; import java.util.List;
import java.util.Properties;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria;
/** /**
* Finds data sets that are possible candidates for archiving * Finds data sets that are possible candidates for archiving The implementing class must have a constructor accepting single parameter of type
* {@link java.util.Properties}
* *
* @author Sascha Fedorenko * @author Sascha Fedorenko
*/ */
public interface IArchiveCandidateDiscoverer public interface IArchiveCandidateDiscoverer
{ {
/**
* Initialize the discoverer with specific properties
*
* @param properties
*/
void initialize(Properties properties);
/** /**
* Return a list of data sets that can be scheduled for archiving. This will be called periodically so there's no need to return everything in one * Return a list of data sets that can be scheduled for archiving. This will be called periodically so there's no need to return everything in one
* list. First best subset is sufficient, make sure though that the older data is returned first. * list. First best subset is sufficient, make sure though that the older data is returned first.
......
package ch.systemsx.cisd.etlserver.plugins; package ch.systemsx.cisd.etlserver.plugins;
import java.util.List; import java.util.List;
import java.util.Properties;
import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer; import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
...@@ -9,22 +8,16 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; ...@@ -9,22 +8,16 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria;
/** /**
* Default archive candidate data set discoverer that simply finds all "old" data sets * Default archive candidate data set discoverer that simply finds all old data sets as specified by criteria
* *
* @author fedoreno * @author Sascha Fedorenko
*/ */
public class AgeArchiveCandidateDiscoverer implements IArchiveCandidateDiscoverer public class AgeArchiveCandidateDiscoverer implements IArchiveCandidateDiscoverer
{ {
@Override @Override
public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openBISService, ArchiverDataSetCriteria criteria) public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openBISService, ArchiverDataSetCriteria criteria)
{ {
return openBISService.listAvailableDataSets(criteria); return openBISService.listAvailableDataSets(criteria);
} }
@Override
public void initialize(Properties properties)
{
}
} }
...@@ -107,7 +107,6 @@ public class AutoArchiverTask implements IMaintenanceTask ...@@ -107,7 +107,6 @@ public class AutoArchiverTask implements IMaintenanceTask
PropertyParametersUtil.extractSingleSectionProperties(properties, PropertyParametersUtil.extractSingleSectionProperties(properties,
DISCOVERY_SECTION_NAME, false); DISCOVERY_SECTION_NAME, false);
archiveCandidateDiscoverer = createArchiveDatasetDiscoverer(discoverySectionProperties); archiveCandidateDiscoverer = createArchiveDatasetDiscoverer(discoverySectionProperties);
archiveCandidateDiscoverer.initialize(properties);
removeFromDataStore = removeFromDataStore =
PropertyUtils.getBoolean(properties, REMOVE_DATASETS_FROM_STORE, false); PropertyUtils.getBoolean(properties, REMOVE_DATASETS_FROM_STORE, false);
......
package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver; package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver;
import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
...@@ -9,23 +8,23 @@ import java.util.Date; ...@@ -9,23 +8,23 @@ import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.lang.time.DateUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import ch.systemsx.cisd.common.logging.LogCategory; import ch.systemsx.cisd.common.logging.LogCategory;
import ch.systemsx.cisd.common.logging.LogFactory; import ch.systemsx.cisd.common.logging.LogFactory;
import ch.systemsx.cisd.common.properties.ExtendedProperties;
import ch.systemsx.cisd.common.properties.PropertyUtils; import ch.systemsx.cisd.common.properties.PropertyUtils;
import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer; import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer;
import ch.systemsx.cisd.openbis.dss.generic.shared.IDataSetPathInfoProvider;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria; import ch.systemsx.cisd.openbis.dss.generic.shared.ISingleDataSetPathInfoProvider;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.CompareMode; import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClause;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClauseAttribute;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClauseTimeAttribute;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project;
/** /**
...@@ -42,13 +41,15 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate ...@@ -42,13 +41,15 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
private static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size"; private static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size";
private static final int DEFAULT_MINIMAL_ARCHIVE_SIZE = 0; private static final long DEFAULT_MINIMAL_ARCHIVE_SIZE = 0;
private static final int DEFAULT_MAXIMAL_ARCHIVE_SIZE = Integer.MAX_VALUE; private static final long DEFAULT_MAXIMAL_ARCHIVE_SIZE = Long.MAX_VALUE;
private int minArchiveSize; private long minArchiveSize;
private int maxArchiveSize; private long maxArchiveSize;
private IDataSetPathInfoProvider pathInfoProvider;
private static class DatasetArchInfo implements Comparable<DatasetArchInfo> private static class DatasetArchInfo implements Comparable<DatasetArchInfo>
{ {
...@@ -65,65 +66,148 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate ...@@ -65,65 +66,148 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
} }
} }
public ByExperimentArchiveCandidateDiscoverer(ExtendedProperties properties)
{
minArchiveSize =
PropertyUtils.getLong(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE);
maxArchiveSize =
PropertyUtils.getLong(properties, MAXIMAL_ARCHIVE_SIZE, DEFAULT_MAXIMAL_ARCHIVE_SIZE);
}
@Override @Override
public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openbis, ArchiverDataSetCriteria criteria) public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openbis, ArchiverDataSetCriteria criteria)
{ {
SearchCriteria sc = new SearchCriteria(); List<AbstractExternalData> dataSets = openbis.listAvailableDataSets(criteria);
sc.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, criteria.tryGetDataSetTypeCode()));
DatasetArchInfo[] sortedCandidates = organizeCandidates(dataSets);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); if (sortedCandidates.length == 0)
String dateBefore = dateFormat.format(DateUtils.addDays(new Date(), -criteria.getOlderThan())); {
sc.addMatchClause(MatchClause.createTimeAttributeMatch(MatchClauseTimeAttribute.MODIFICATION_DATE, CompareMode.LESS_THAN_OR_EQUAL, return new ArrayList<AbstractExternalData>(0);
dateBefore, "0")); }
// TODO: not yet archived SortedMap<Project, DatasetArchInfo> byProject = groupByProject(sortedCandidates);
// sc.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute., desiredValue));
List<AbstractExternalData> dataSets = openbis.searchForDataSets(sc); boolean hadGoodCandidates = false;
for (Project p : byProject.keySet())
{
DatasetArchInfo projectSets = byProject.get(p);
if (projectSets.totalSize > minArchiveSize)
{
hadGoodCandidates = true;
if (projectSets.totalSize < maxArchiveSize)
{
return reportFind(projectSets.datasets);
}
List<AbstractExternalData> projectSubset = selectSuitableSubsetBySample(projectSets.datasets);
if (projectSubset.size() > 0)
{
return reportFind(projectSubset);
}
}
}
Map<Project, DatasetArchInfo> candidates = new HashMap<Project, DatasetArchInfo>(); if (hadGoodCandidates)
{
operationLog.info("Found datasets matching By Experiment archivation policy, but no subset fit within "
+ "MINIMAL_ARCHIVE_SIZE and MAXIMAL_ARCHIVE_SIZE criteria.");
}
return new ArrayList<AbstractExternalData>();
}
private List<AbstractExternalData> reportFind(List<AbstractExternalData> datasets)
{
for (AbstractExternalData ds : datasets)
{
operationLog.info("Will archive " + ds.getCode() + " with experiment " + ds.getExperiment().getCode());
}
return datasets;
}
private SortedMap<Project, DatasetArchInfo> groupByProject(DatasetArchInfo[] sortedCandidates)
{
SortedMap<Project, DatasetArchInfo> result = new TreeMap<Project, DatasetArchInfo>();
for (DatasetArchInfo info : sortedCandidates)
{
Project project = info.datasets.get(0).getExperiment().getProject();
DatasetArchInfo current = result.get(project);
if (current == null)
{
current = new DatasetArchInfo();
}
current.datasets.addAll(info.datasets);
current.totalSize += info.totalSize;
result.put(project, current);
}
return result;
}
private DatasetArchInfo[] organizeCandidates(List<AbstractExternalData> dataSets)
{
Map<Experiment, DatasetArchInfo> candidates = new HashMap<Experiment, DatasetArchInfo>();
for (AbstractExternalData ds : dataSets) for (AbstractExternalData ds : dataSets)
{ {
Project project = ds.getExperiment().getProject(); Experiment experiment = ds.getExperiment();
DatasetArchInfo candidate = candidates.get(project); DatasetArchInfo candidate = candidates.get(experiment);
if (candidate == null) if (candidate == null)
{ {
candidate = new DatasetArchInfo(); candidate = new DatasetArchInfo();
} }
candidate.datasets.add(ds); candidate.datasets.add(ds);
candidate.totalSize += ds.getSize();
if (candidate.minDate.compareTo(ds.getModificationDate()) > 0) Long size = ds.getSize();
if (size == null)
{ {
candidate.minDate = ds.getModificationDate(); ISingleDataSetPathInfoProvider dsInfoProvider = getDatasetPathInfoProvider().tryGetSingleDataSetPathInfoProvider(ds.getCode());
if (dsInfoProvider != null)
{
size = dsInfoProvider.getRootPathInfo().getSizeInBytes();
ds.setSize(size);
}
} }
candidates.put(project, candidate); if (size != null)
}
DatasetArchInfo[] sortedCandidates = candidates.values().toArray(new DatasetArchInfo[candidates.size()]);
Arrays.sort(sortedCandidates);
for (DatasetArchInfo ai : sortedCandidates)
{
if (ai.totalSize > minArchiveSize)
{ {
if (ai.totalSize < maxArchiveSize) candidate.totalSize += size;
if (candidate.minDate.compareTo(ds.getModificationDate()) > 0)
{ {
return ai.datasets; candidate.minDate = ds.getModificationDate();
} }
return selectSuitableSubset(ai.datasets); candidates.put(experiment, candidate);
} else
{
operationLog.warn("Failed determining data set size of " + ds.getCode() + ", cannot include it in archval candidates set.");
} }
} }
operationLog.info("No dataset collection matches By Experiment archivation policy."); if (candidates.size() == 0)
{
return new DatasetArchInfo[0];
}
return new ArrayList<AbstractExternalData>();
DatasetArchInfo[] sortedCandidates = candidates.values().toArray(new DatasetArchInfo[candidates.size()]);
Arrays.sort(sortedCandidates);
return sortedCandidates;
} }
private List<AbstractExternalData> selectSuitableSubset(List<AbstractExternalData> datasets) private IDataSetPathInfoProvider getDatasetPathInfoProvider()
{
if (pathInfoProvider == null)
{
pathInfoProvider = ServiceProvider.getDataSetPathInfoProvider();
}
return pathInfoProvider;
}
private List<AbstractExternalData> selectSuitableSubsetBySample(List<AbstractExternalData> datasets)
{ {
ArrayList<AbstractExternalData> result = new ArrayList<AbstractExternalData>(); ArrayList<AbstractExternalData> result = new ArrayList<AbstractExternalData>();
...@@ -144,7 +228,7 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate ...@@ -144,7 +228,7 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
long curSize = 0; long curSize = 0;
for (AbstractExternalData ds : datasets) for (AbstractExternalData ds : datasets)
{ {
if (curSize + ds.getSize() > maxArchiveSize) if (curSize + ds.getSize() > maxArchiveSize && curSize > minArchiveSize)
{ {
return result; return result;
} }
...@@ -152,21 +236,11 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate ...@@ -152,21 +236,11 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
curSize += ds.getSize(); curSize += ds.getSize();
} }
operationLog.warn("Found datasets matching By Experiment archivation policy " if (curSize < minArchiveSize)
+ "but wasn't able to pick a subset for MAXIMAL_ARCHIVE_SIZE criteria."); {
return new ArrayList<AbstractExternalData>();
}
return result; return result;
} }
@Override
public void initialize(Properties properties)
{
minArchiveSize =
PropertyUtils.getInt(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE);
maxArchiveSize =
PropertyUtils.getInt(properties, MAXIMAL_ARCHIVE_SIZE, DEFAULT_MAXIMAL_ARCHIVE_SIZE);
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment