diff --git a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/Parameters.java b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/Parameters.java index 2513959a82952849e23e16cdc5e6a5dab84d36ce..188e8d55d9896397ad64d379e2defd9ee7bd93d7 100644 --- a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/Parameters.java +++ b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/Parameters.java @@ -1,5 +1,5 @@ /* - * Copyright 2009 ETH Zuerich, CISD + * Copyright 2016 ETH Zuerich, SIS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ import ch.systemsx.cisd.common.properties.PropertyUtils; /** * @author Tomasz Pylak + * @author Manuel Kohler */ public class Parameters { @@ -40,7 +41,20 @@ public class Parameters private static final String TRACKING_ADMIN_EMAIL = "tracking-admin-email"; private static final String NOTIFICATION_EMAIL_FROM = "mail.from"; - + + private static final String SPACE_WHITELIST = "space-whitelist"; + + private static final String DBM_SPACE_PREFIX = "dbm-space-prefix"; + + // For Development Mode + private static final String DEBUG = "debug"; + + private static final String OLD_DATA_SET_BACKLOG_NUMBER = "old-data-set-backlog-number"; + + private static final String DATA_SET_TYPE_LIST = "dataset-type-list"; + + private static final String DESTINATION_FOLDER = "destination-folder"; + private final String openbisUser; private final String openbisPassword; @@ -54,6 +68,18 @@ public class Parameters private final String adminEmail; private final String notificationEmail; + + private final String spaceWhitelist; + + private final String dbmSpacePrefix; + + private final boolean debug; + + private final long oldDataSetBacklogNumber; + + private final String dataSetTypeList; + + private final String destinationFolder; public Parameters(Properties props) { @@ -64,6 +90,12 @@ public class Parameters this.mailClient = new MailClient(props); this.adminEmail = PropertyUtils.getProperty(props, TRACKING_ADMIN_EMAIL); this.notificationEmail = PropertyUtils.getProperty(props, NOTIFICATION_EMAIL_FROM); + this.spaceWhitelist = PropertyUtils.getProperty(props, SPACE_WHITELIST); + this.dbmSpacePrefix = PropertyUtils.getProperty(props, DBM_SPACE_PREFIX); + this.debug = PropertyUtils.getBoolean(props, DEBUG, false); + this.oldDataSetBacklogNumber = PropertyUtils.getInt(props, OLD_DATA_SET_BACKLOG_NUMBER, 0); + this.dataSetTypeList = PropertyUtils.getProperty(props, DATA_SET_TYPE_LIST); + this.destinationFolder= PropertyUtils.getProperty(props, DESTINATION_FOLDER); } public String getOpenbisUser() @@ -100,5 +132,34 @@ public class Parameters { return notificationEmail; } - -} + + public String getSpaceWhitelist() + { + return spaceWhitelist; + } + + public String getDbmSpacePrefix() + { + return dbmSpacePrefix; + } + + public boolean getDebug() + { + return debug; + } + + public long getoldDataSetBacklogNumber() + { + return oldDataSetBacklogNumber; + } + + public String getdataSetTypeList() + { + return dataSetTypeList; + } + + public String getDestinationFolder() + { + return destinationFolder; + } +} \ No newline at end of file diff --git a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingBO.java b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingBO.java index b2120d23d6bb9ef257be7744195093ee86b99650..aae72a179961b2bab2ede88719eb312ca4137fd8 100644 --- a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingBO.java +++ b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingBO.java @@ -16,12 +16,15 @@ package ch.ethz.bsse.cisd.dsu.tracking.main; +import java.io.File; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; + import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -33,10 +36,12 @@ import ch.ethz.bsse.cisd.dsu.tracking.email.EmailWithSummary; import ch.ethz.bsse.cisd.dsu.tracking.email.IEntityTrackingEmailGenerator; import ch.ethz.bsse.cisd.dsu.tracking.utils.LogUtils; import ch.systemsx.cisd.common.collection.CollectionUtils; +import ch.systemsx.cisd.common.filesystem.rsync.RsyncCopier; import ch.systemsx.cisd.common.mail.EMailAddress; import ch.systemsx.cisd.common.mail.IMailClient; import ch.systemsx.cisd.openbis.generic.shared.ITrackingServer; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetArchivingStatus; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.IEntityProperty; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.TrackingDataSetCriteria; @@ -51,7 +56,9 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier; public class TrackingBO { - private static final String SEQUENCING_SAMPLE_TYPE = "ILLUMINA_SEQUENCING"; + private static final String PROPERTY_RUN_NAME_FOLDER = "RUN_NAME_FOLDER"; + + private static final String SEQUENCING_SAMPLE_TYPE = "ILLUMINA_SEQUENCING"; private static final String FLOW_LANE_SAMPLE_TYPE = "ILLUMINA_FLOW_LANE"; @@ -61,7 +68,7 @@ public class TrackingBO "LIBRARY_PROCESSING_SUCCESSFUL"; private static final String TRUE = "true"; - + private final ITrackingServer trackingServer; private final IEntityTrackingEmailGenerator emailGenerator; @@ -77,7 +84,7 @@ public class TrackingBO } public void trackAndNotify(ITrackingDAO trackingDAO, final HashMap<String, String[]> commandLineMap, - SessionContextDTO session) + Parameters params, SessionContextDTO session) { Boolean sendEmails = true; TrackingStateDTO prevTrackingState = trackingDAO.getTrackingState(); @@ -86,20 +93,27 @@ public class TrackingBO TrackedEntities changedEntities = null; List<EmailWithSummary> emailsWithSummary = null; + if (commandLineMap.get(TrackingClient.CL_PARAMETER_LANES) != null) { - changedEntities = fetchChangedDataSets(prevTrackingState, trackingServer, + changedEntities = fetchChangedDataSets(prevTrackingState, trackingServer, params, commandLineMap.get(TrackingClient.CL_PARAMETER_LANES), session); - } else if (commandLineMap.containsKey(TrackingClient.CL_PARAMETER_ALL)) + } + + else if (commandLineMap.containsKey(TrackingClient.CL_PARAMETER_ALL)) { // changedEntities = fetchChangedEntities(prevTrackingState, trackingServer, commandLineMap, session); System.out.println("This function is deactivated"); - } else if (commandLineMap.containsKey(TrackingClient.CL_PARAMETER_CHANGED_LANES)) + } + + // just list the potential chnaged lanes + else if (commandLineMap.containsKey(TrackingClient.CL_PARAMETER_CHANGED_LANES)) { - Map<String, String> changed_lanes = fetchChangedLanes(prevTrackingState, trackingServer, session); + Map<String, String> changed_lanes = fetchChangedLanes(prevTrackingState, trackingServer, params, session); sendEmails = false; - } else + } + else { LogUtils.debug("Should never be reached."); } @@ -108,7 +122,10 @@ public class TrackingBO { emailsWithSummary = emailGenerator.generateDataSetsEmails(changedEntities); sendEmails(emailsWithSummary, mailClient); - saveTrackingState(prevTrackingState, changedEntities, trackingDAO); + + if (!params.getDebug()) { + saveTrackingState(prevTrackingState, changedEntities, trackingDAO); + } } } @@ -223,21 +240,21 @@ public class TrackingBO } private static Map<String, String> fetchChangedLanes(TrackingStateDTO trackingState, - ITrackingServer trackingServer, SessionContextDTO session) + ITrackingServer trackingServer, Parameters params, SessionContextDTO session) { - long maxDataSetId = getMaxDataSetId(trackingState); - LogUtils.info("Using maximum DS techId " + maxDataSetId + " for search of changed data sets"); + long usableDataSetId = getUsableDataSetId(trackingState, params); + LogUtils.info("Using maximum DS techId " + usableDataSetId + " for search of changed data sets"); TrackingDataSetCriteria dataSetCriteria = - new TrackingDataSetCriteria(FLOW_LANE_SAMPLE_TYPE, maxDataSetId); + new TrackingDataSetCriteria(FLOW_LANE_SAMPLE_TYPE, usableDataSetId); List<AbstractExternalData> dataSets = trackingServer.listDataSets(session.getSessionToken(), dataSetCriteria); Map<String, String> changedLanesMap = new HashMap<String, String>(); - + // Loop over all new data sets for (AbstractExternalData d : dataSets) - { + { Long newDataSetID = d.getId(); Sample lane = d.getSample(); String lanePermId = lane.getPermId(); @@ -253,7 +270,7 @@ public class TrackingBO List<IEntityProperty> flowcellProperties = flowcell.getProperties(); for (IEntityProperty property : flowcellProperties) { - if (property.getPropertyType().getCode().equals("RUN_NAME_FOLDER")) + if (property.getPropertyType().getCode().equals(PROPERTY_RUN_NAME_FOLDER)) { runNameFolder = property.getValue(); break; @@ -261,9 +278,28 @@ public class TrackingBO } String laneString = currentLaneId.toString().split(":")[1]; changedLanesMap.put(runNameFolder + ":" + laneString, laneSpace + " " + lane.getCode()); - LogUtils.info("DataSetID: " + newDataSetID + " of NEW data Sets > MAX DataSet id for this sample: " + maxDatasetIdForSample); + LogUtils.info("DataSetID: " + newDataSetID + " of NEW data Sets > MAX DataSet id for this sample: " + maxDatasetIdForSample); } } + + + +// Properties properties = new Properties(); +// File storeRoot = null; +// +// DataSetCopier dsc = new DataSetCopier(properties, storeRoot); +// dsc.process(description, context); + +// PhysicalDataSet pds = d.tryGetAsDataSet(); +// +// DataSetProcessingContext context = null; +// +// DatasetDescription description = new DatasetDescription(); +// description.setDataSetCode(d.getCode()); +// description.setDatasetTypeCode(d.getDataSetType().getCode()); +// description.setDataSetLocation(d.tryGetAsDataSet().getDataSetLocation()); + + Set<Map.Entry<String, String>> entrySet = changedLanesMap.entrySet(); for (Entry<String, String> entry : entrySet) { @@ -275,18 +311,22 @@ public class TrackingBO } private static TrackedEntities fetchChangedDataSets(TrackingStateDTO trackingState, - ITrackingServer trackingServer, String[] laneCodeList, SessionContextDTO session) + ITrackingServer trackingServer, Parameters params, String[] laneCodeList, SessionContextDTO session) { - long maxDataSetId = getMaxDataSetId(trackingState); - LogUtils.info("Using maximum DS techId " + maxDataSetId + " for search of changed data sets"); + long usableDataSetId = getUsableDataSetId(trackingState, params); + List<String> spaceWhiteList = Arrays.asList(params.getSpaceWhitelist().split("\\s*,\\s*")); + List<String> datasetTypeList = Arrays.asList(params.getdataSetTypeList().split("\\s*,\\s*")); + + LogUtils.info("Using maximum DS techId " + usableDataSetId + " for search of changed data sets"); TrackingDataSetCriteria dataSetCriteria = - new TrackingDataSetCriteria(FLOW_LANE_SAMPLE_TYPE, maxDataSetId); + new TrackingDataSetCriteria(FLOW_LANE_SAMPLE_TYPE, usableDataSetId); List<AbstractExternalData> dataSets = trackingServer.listDataSets(session.getSessionToken(), dataSetCriteria); ArrayList<SampleIdentifier> filterList = new ArrayList<SampleIdentifier>(); ArrayList<AbstractExternalData> filteredDataSets = new ArrayList<AbstractExternalData>(); + ArrayList<AbstractExternalData> toTransferDataSets = new ArrayList<AbstractExternalData>(); HashMap<String, ArrayList<Long>> changedTrackingMap = new HashMap<String, ArrayList<Long>>(); // Loop over all lanes and create a list of relevant lanes @@ -309,12 +349,38 @@ public class TrackingBO { LogUtils.info("DataSetID: " + newDataSetID + " of NEW data Sets > MAX DataSet id for this sample: " + maxDatasetIdForSample); filteredDataSets.add(d); + + if (spaceWhiteList.contains(d.getSpace().getCode()) || d.getSpace().getCode().startsWith(params.getDbmSpacePrefix())) { + if (datasetTypeList.contains(d.getDataSetType().getCode())) { + if (d.tryGetAsDataSet().getStatus().equals(DataSetArchivingStatus.AVAILABLE) || d.tryGetAsDataSet().getStatus().equals(DataSetArchivingStatus.LOCKED)) { + toTransferDataSets.add(d); + } + else { + LogUtils.error("Data set " + d.getCode() + " eventually archived!"); + } + } + } + } System.out.println("Sending Email for " + newDataSetID + " which is part of " + d.getSampleCode()); addDataSetTo(changedTrackingMap, d); - } + } + + LogUtils.info("TO_TRANSFER: Found " + toTransferDataSets.size() + " data sets which are to be transferred to an extra folder"); + + + + //TODO: extract and make robust + File rsyncBinary = new File("/opt/local/bin/rsync"); + File destination = new File(params.getDestinationFolder()); + RsyncCopier copier = new RsyncCopier(rsyncBinary, null, "-a"); + + for (AbstractExternalData ds : dataSets) { + File source = new File(ds.tryGetAsDataSet().getFullLocation()); + copier.copy(source, destination, null, null); + LogUtils.info("Copying " + ds.getCode() + " to " + params.getDestinationFolder()); + } - LogUtils.info(changedTrackingMap.toString()); LogUtils.info("Found " + filteredDataSets.size() + " data sets which are connected to samples in " + filterList.toString()); return gatherTrackedEntities(trackingState, trackingServer, session, filteredDataSets, changedTrackingMap); } @@ -326,9 +392,23 @@ public class TrackingBO { maxDataSetId = Math.max(maxDataSetId, id); } - return 0; - // return maxDataSetId; +// return 0; + return maxDataSetId; + } + + /* Little helper function which reduces the number of data sets we are looking at. This can be configured + * by value 'old-data-set-backlog-number' in the service.properties. Without this value + * the calls get slower and slower with the growing data set amount. But we can assume that older data sets + * already got triggered earlier to send out an email. If not, the sample is so old that we do not want + * to send an email. + */ + private static long getUsableDataSetId (TrackingStateDTO trackingState, Parameters params) { + long maxDataSetId = getMaxDataSetId(trackingState); + long oldDataSetBacklogNumber = params.getoldDataSetBacklogNumber(); + long usableDataSetId = Math.max(maxDataSetId - oldDataSetBacklogNumber, 0); + return usableDataSetId; } + private static long getMaxDataSetIdForSample(TrackingStateDTO trackingState, String lanePermId) { diff --git a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingClient.java b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingClient.java index d7b40b64d2d6c7e49f6c45d398bbd885f01e4491..c7a246076efabb887757b74dd39d4e1cdea6fdab 100644 --- a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingClient.java +++ b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/main/TrackingClient.java @@ -153,7 +153,7 @@ public class TrackingClient ITrackingDAO trackingDAO = new FileBasedTrackingDAO(LOCAL_SAMPLE_DB, LOCAL_DATASET_DB); - trackingBO.trackAndNotify(trackingDAO, commandLineMap, session); + trackingBO.trackAndNotify(trackingDAO, commandLineMap, params, session); } private static ITrackingServer createOpenBISTrackingServer(Parameters params) diff --git a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/utils/LogUtils.java b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/utils/LogUtils.java index cfd7d992e099e32ba5f37539acd07018625fde1f..1e123289452243c46d521fc8489069d2f1e2fb3f 100644 --- a/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/utils/LogUtils.java +++ b/deep_sequencing_unit/source/java/ch/ethz/bsse/cisd/dsu/tracking/utils/LogUtils.java @@ -80,4 +80,9 @@ public class LogUtils { operationLog.debug(msg); } + + public static void error(String msg) + { + operationLog.error(msg); + } } diff --git a/deep_sequencing_unit/tracking/dist/etc/service.properties b/deep_sequencing_unit/tracking/dist/etc/service.properties index 060c04f32f0dfe2165b99b0b8e140a00811d9a39..0efa4f4114a2c712741b7e937eba37cb994092b2 100644 --- a/deep_sequencing_unit/tracking/dist/etc/service.properties +++ b/deep_sequencing_unit/tracking/dist/etc/service.properties @@ -26,3 +26,22 @@ tracking-admin-email = kohleman@ethz.ch, gpawel@ethz.ch FMI-affiliation-notification-email-contact = jython-version=2.7 + +# if a sample is part of this space list then it will be transferred to an extra folder +space-whitelist = SPACE1, SPACE2 + +# All DBM spaces start with this string and will be copied to an extra folder +dbm-space-prefix = DBM_ + +# boolean flag used to make a developer life easier +debug = true + +# go back up to this number when searching for older data sets, used to speed up the request. +# The lower this number the faster, but you might miss older data sets +old-data-set-backlog-number = 1000 + +# List of DataSet Types which should be copied into an extra folder +dataset-type-list = FASTQ_GZ + +# folder where the data should be copied to +destination-folder = /tmp/ \ No newline at end of file