Skip to content
Snippets Groups Projects
Commit 3f4febfa authored by fedoreno's avatar fedoreno
Browse files

first working ByExpermient archiver

SVN: 32750
parent e6563832
No related branches found
No related tags found
No related merge requests found
......@@ -53,7 +53,7 @@ highwater-mark = 1048576
notify-successful-registration = false
# The URL of the openBIS server
server-url = http://localhost:8888
server-url = http://localhost:8888/
# Time out for accessing openBIS server. Default value is 5 minutes.
server-timeout-in-minutes = 10
......@@ -81,7 +81,7 @@ cifex-admin-username =
cifex-admin-password =
# The base URL for Web client access.
download-url = http://localhost:8889
download-url = http://localhost:8889/
# SMTP properties (must start with 'mail' to be considered).
mail.smtp.host = file://${root-dir}/email
......@@ -117,7 +117,7 @@ data-set-file-name-entity-separator = _
# The period of no write access that needs to pass before an incoming data item is considered
# complete and ready to be processed (in seconds) [default: 300].
# Valid only when auto-detection method is used to determine if an incoming data are ready to be processed.
quiet-period = 3
quiet-period = 30
# ---------------------------------------------------------------------------
# reporting and processing plugins configuration
......@@ -385,12 +385,13 @@ hcs-image-overview.label = plugin for HCS_IMAGE
# Archiver class specification (together with the list of packages this class belongs to).
#archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.demo.DemoArchiver
archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.RsyncArchiver
archiver.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.TarArchiver
archiver.default-archive-folder = /Users/fedoreno/tmp/openbis/tar
# dectination of the archive (can be local or remote)
# local:
#archiver.destination = openbis:tmp/dest
# remote:
archiver.destination = /Users/openbis/dest
archiver.destination = /Users/fedoreno/tmp/openbis/dest
# indicates if data should be synchronized when local copy differs from one in archive (default: true)
archiver.synchronize-archive = true
archiver.batch-size-in-bytes = 20000000
......@@ -416,15 +417,23 @@ auto-archiver.class = ch.systemsx.cisd.etlserver.plugins.AutoArchiverTask
# The time between subsequent archivizations (in seconds)
auto-archiver.interval = 10
# Time of the first execution (HH:mm)
auto-archiver.start = 23:00
# auto-archiver.start = 16:10
# following properties are optional
# only data sets of specified type will be archived
#auto-archiver.data-set-type = UNKNOWN
# only data sets that are older than specified number of days will be archived (default = 0)
#auto-archiver.older-than = 90
auto-archiver.older-than = 0
# fully qualified class name of a policy that additionally filters data sets to be filtered
#auto-archiver.policy.class = ch.systemsx.cisd.etlserver.plugins.DummyAutoArchiverPolicy
# use this archiver to archive datasets in batches grouped by experiment
#auto-archiver.archive-candidate-discoverer.class = ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver.ByExperimentArchiveCandidateDiscoverer
# the min-size in bytes, default is 0
#auto-archiver.archive-candidate-discoverer.minimal-archive-size =
# the max-size in bytes, default is 2^63-1. Set it to accomodate at least couple of average datasets
#auto-archiver.archive-candidate-discoverer.maximal-archive-size =
# Maintenance task (performed only once) to create paths of existing data sets in pathinfo database
path-info-feeding.class = ch.systemsx.cisd.etlserver.path.PathInfoDatabaseFeedingTask
path-info-feeding.execute-only-once = true
......
package ch.systemsx.cisd.etlserver;
import java.util.List;
import java.util.Properties;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria;
/**
* Finds data sets that are possible candidates for archiving
* Finds data sets that are possible candidates for archiving The implementing class must have a constructor accepting single parameter of type
* {@link java.util.Properties}
*
* @author Sascha Fedorenko
*/
public interface IArchiveCandidateDiscoverer
{
/**
* Initialize the discoverer with specific properties
*
* @param properties
*/
void initialize(Properties properties);
/**
* Return a list of data sets that can be scheduled for archiving. This will be called periodically so there's no need to return everything in one
* list. First best subset is sufficient, make sure though that the older data is returned first.
......
package ch.systemsx.cisd.etlserver.plugins;
import java.util.List;
import java.util.Properties;
import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
......@@ -9,22 +8,16 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria;
/**
* Default archive candidate data set discoverer that simply finds all "old" data sets
* Default archive candidate data set discoverer that simply finds all old data sets as specified by criteria
*
* @author fedoreno
* @author Sascha Fedorenko
*/
public class AgeArchiveCandidateDiscoverer implements IArchiveCandidateDiscoverer
{
@Override
public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openBISService, ArchiverDataSetCriteria criteria)
{
return openBISService.listAvailableDataSets(criteria);
}
@Override
public void initialize(Properties properties)
{
}
}
......@@ -107,7 +107,6 @@ public class AutoArchiverTask implements IMaintenanceTask
PropertyParametersUtil.extractSingleSectionProperties(properties,
DISCOVERY_SECTION_NAME, false);
archiveCandidateDiscoverer = createArchiveDatasetDiscoverer(discoverySectionProperties);
archiveCandidateDiscoverer.initialize(properties);
removeFromDataStore =
PropertyUtils.getBoolean(properties, REMOVE_DATASETS_FROM_STORE, false);
......
package ch.systemsx.cisd.openbis.dss.generic.server.plugins.standard.archiver;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
......@@ -9,23 +8,23 @@ import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.lang.time.DateUtils;
import org.apache.log4j.Logger;
import ch.systemsx.cisd.common.logging.LogCategory;
import ch.systemsx.cisd.common.logging.LogFactory;
import ch.systemsx.cisd.common.properties.ExtendedProperties;
import ch.systemsx.cisd.common.properties.PropertyUtils;
import ch.systemsx.cisd.etlserver.IArchiveCandidateDiscoverer;
import ch.systemsx.cisd.openbis.dss.generic.shared.IDataSetPathInfoProvider;
import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.CompareMode;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClause;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClauseAttribute;
import ch.systemsx.cisd.openbis.generic.shared.api.v1.dto.SearchCriteria.MatchClauseTimeAttribute;
import ch.systemsx.cisd.openbis.dss.generic.shared.ISingleDataSetPathInfoProvider;
import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ArchiverDataSetCriteria;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment;
import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project;
/**
......@@ -42,13 +41,15 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
private static final String MAXIMAL_ARCHIVE_SIZE = "maximal-archive-size";
private static final int DEFAULT_MINIMAL_ARCHIVE_SIZE = 0;
private static final long DEFAULT_MINIMAL_ARCHIVE_SIZE = 0;
private static final int DEFAULT_MAXIMAL_ARCHIVE_SIZE = Integer.MAX_VALUE;
private static final long DEFAULT_MAXIMAL_ARCHIVE_SIZE = Long.MAX_VALUE;
private int minArchiveSize;
private long minArchiveSize;
private int maxArchiveSize;
private long maxArchiveSize;
private IDataSetPathInfoProvider pathInfoProvider;
private static class DatasetArchInfo implements Comparable<DatasetArchInfo>
{
......@@ -65,65 +66,148 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
}
}
public ByExperimentArchiveCandidateDiscoverer(ExtendedProperties properties)
{
minArchiveSize =
PropertyUtils.getLong(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE);
maxArchiveSize =
PropertyUtils.getLong(properties, MAXIMAL_ARCHIVE_SIZE, DEFAULT_MAXIMAL_ARCHIVE_SIZE);
}
@Override
public List<AbstractExternalData> findDatasetsForArchiving(IEncapsulatedOpenBISService openbis, ArchiverDataSetCriteria criteria)
{
SearchCriteria sc = new SearchCriteria();
sc.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute.TYPE, criteria.tryGetDataSetTypeCode()));
List<AbstractExternalData> dataSets = openbis.listAvailableDataSets(criteria);
DatasetArchInfo[] sortedCandidates = organizeCandidates(dataSets);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
String dateBefore = dateFormat.format(DateUtils.addDays(new Date(), -criteria.getOlderThan()));
sc.addMatchClause(MatchClause.createTimeAttributeMatch(MatchClauseTimeAttribute.MODIFICATION_DATE, CompareMode.LESS_THAN_OR_EQUAL,
dateBefore, "0"));
if (sortedCandidates.length == 0)
{
return new ArrayList<AbstractExternalData>(0);
}
// TODO: not yet archived
// sc.addMatchClause(MatchClause.createAttributeMatch(MatchClauseAttribute., desiredValue));
SortedMap<Project, DatasetArchInfo> byProject = groupByProject(sortedCandidates);
List<AbstractExternalData> dataSets = openbis.searchForDataSets(sc);
boolean hadGoodCandidates = false;
for (Project p : byProject.keySet())
{
DatasetArchInfo projectSets = byProject.get(p);
if (projectSets.totalSize > minArchiveSize)
{
hadGoodCandidates = true;
if (projectSets.totalSize < maxArchiveSize)
{
return reportFind(projectSets.datasets);
}
List<AbstractExternalData> projectSubset = selectSuitableSubsetBySample(projectSets.datasets);
if (projectSubset.size() > 0)
{
return reportFind(projectSubset);
}
}
}
Map<Project, DatasetArchInfo> candidates = new HashMap<Project, DatasetArchInfo>();
if (hadGoodCandidates)
{
operationLog.info("Found datasets matching By Experiment archivation policy, but no subset fit within "
+ "MINIMAL_ARCHIVE_SIZE and MAXIMAL_ARCHIVE_SIZE criteria.");
}
return new ArrayList<AbstractExternalData>();
}
private List<AbstractExternalData> reportFind(List<AbstractExternalData> datasets)
{
for (AbstractExternalData ds : datasets)
{
operationLog.info("Will archive " + ds.getCode() + " with experiment " + ds.getExperiment().getCode());
}
return datasets;
}
private SortedMap<Project, DatasetArchInfo> groupByProject(DatasetArchInfo[] sortedCandidates)
{
SortedMap<Project, DatasetArchInfo> result = new TreeMap<Project, DatasetArchInfo>();
for (DatasetArchInfo info : sortedCandidates)
{
Project project = info.datasets.get(0).getExperiment().getProject();
DatasetArchInfo current = result.get(project);
if (current == null)
{
current = new DatasetArchInfo();
}
current.datasets.addAll(info.datasets);
current.totalSize += info.totalSize;
result.put(project, current);
}
return result;
}
private DatasetArchInfo[] organizeCandidates(List<AbstractExternalData> dataSets)
{
Map<Experiment, DatasetArchInfo> candidates = new HashMap<Experiment, DatasetArchInfo>();
for (AbstractExternalData ds : dataSets)
{
Project project = ds.getExperiment().getProject();
DatasetArchInfo candidate = candidates.get(project);
Experiment experiment = ds.getExperiment();
DatasetArchInfo candidate = candidates.get(experiment);
if (candidate == null)
{
candidate = new DatasetArchInfo();
}
candidate.datasets.add(ds);
candidate.totalSize += ds.getSize();
if (candidate.minDate.compareTo(ds.getModificationDate()) > 0)
Long size = ds.getSize();
if (size == null)
{
candidate.minDate = ds.getModificationDate();
ISingleDataSetPathInfoProvider dsInfoProvider = getDatasetPathInfoProvider().tryGetSingleDataSetPathInfoProvider(ds.getCode());
if (dsInfoProvider != null)
{
size = dsInfoProvider.getRootPathInfo().getSizeInBytes();
ds.setSize(size);
}
}
candidates.put(project, candidate);
}
DatasetArchInfo[] sortedCandidates = candidates.values().toArray(new DatasetArchInfo[candidates.size()]);
Arrays.sort(sortedCandidates);
for (DatasetArchInfo ai : sortedCandidates)
{
if (ai.totalSize > minArchiveSize)
if (size != null)
{
if (ai.totalSize < maxArchiveSize)
candidate.totalSize += size;
if (candidate.minDate.compareTo(ds.getModificationDate()) > 0)
{
return ai.datasets;
candidate.minDate = ds.getModificationDate();
}
return selectSuitableSubset(ai.datasets);
candidates.put(experiment, candidate);
} else
{
operationLog.warn("Failed determining data set size of " + ds.getCode() + ", cannot include it in archval candidates set.");
}
}
operationLog.info("No dataset collection matches By Experiment archivation policy.");
if (candidates.size() == 0)
{
return new DatasetArchInfo[0];
}
return new ArrayList<AbstractExternalData>();
DatasetArchInfo[] sortedCandidates = candidates.values().toArray(new DatasetArchInfo[candidates.size()]);
Arrays.sort(sortedCandidates);
return sortedCandidates;
}
private List<AbstractExternalData> selectSuitableSubset(List<AbstractExternalData> datasets)
private IDataSetPathInfoProvider getDatasetPathInfoProvider()
{
if (pathInfoProvider == null)
{
pathInfoProvider = ServiceProvider.getDataSetPathInfoProvider();
}
return pathInfoProvider;
}
private List<AbstractExternalData> selectSuitableSubsetBySample(List<AbstractExternalData> datasets)
{
ArrayList<AbstractExternalData> result = new ArrayList<AbstractExternalData>();
......@@ -144,7 +228,7 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
long curSize = 0;
for (AbstractExternalData ds : datasets)
{
if (curSize + ds.getSize() > maxArchiveSize)
if (curSize + ds.getSize() > maxArchiveSize && curSize > minArchiveSize)
{
return result;
}
......@@ -152,21 +236,11 @@ public class ByExperimentArchiveCandidateDiscoverer implements IArchiveCandidate
curSize += ds.getSize();
}
operationLog.warn("Found datasets matching By Experiment archivation policy "
+ "but wasn't able to pick a subset for MAXIMAL_ARCHIVE_SIZE criteria.");
if (curSize < minArchiveSize)
{
return new ArrayList<AbstractExternalData>();
}
return result;
}
@Override
public void initialize(Properties properties)
{
minArchiveSize =
PropertyUtils.getInt(properties, MINIMAL_ARCHIVE_SIZE, DEFAULT_MINIMAL_ARCHIVE_SIZE);
maxArchiveSize =
PropertyUtils.getInt(properties, MAXIMAL_ARCHIVE_SIZE, DEFAULT_MAXIMAL_ARCHIVE_SIZE);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment