diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetHandler.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetHandler.java index 905330f4885612c24ddcfbd334ee9f79d6301aff..695edea065abbf8c1a3bb2fe898dfcdfccce74a6 100644 --- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetHandler.java +++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/BatchDataSetHandler.java @@ -38,7 +38,9 @@ import ch.systemsx.cisd.yeastx.etl.DatasetMappingUtil.DataSetMappingInformationF /** * {@link IDataSetHandler} implementation which for each dataset directory reads all the files - * inside that directory and runs the primary dataset handler for it. + * inside that directory and runs the primary dataset handler for it.<br> + * Following properties can be configured:<br> {@link PreprocessingExecutor#PREPROCESSING_SCRIPT_PATH} - + * the path to the script which acquires write access. * * @author Tomasz Pylak */ @@ -50,13 +52,18 @@ public class BatchDataSetHandler implements IDataSetHandler private final DatasetMappingResolver datasetMappingResolver; + // the script which ensures that we have write access to the datasets + private final PreprocessingExecutor writeAccessSetter; + public BatchDataSetHandler(Properties parentProperties, IDataSetHandler delegator, IEncapsulatedOpenBISService openbisService) { this.delegator = delegator; this.mailClient = new MailClient(parentProperties); + Properties specificProperties = getSpecificProperties(parentProperties); this.datasetMappingResolver = - new DatasetMappingResolver(getSpecificProperties(parentProperties), openbisService); + new DatasetMappingResolver(specificProperties, openbisService); + this.writeAccessSetter = PreprocessingExecutor.create(specificProperties); } private static Properties getSpecificProperties(Properties properties) @@ -65,45 +72,108 @@ public class BatchDataSetHandler implements IDataSetHandler true); } - public List<DataSetInformation> handleDataSet(File datasetsParentDir) + public List<DataSetInformation> handleDataSet(File batchDir) { - if (canBatchBeProcessed(datasetsParentDir) == false) + if (canBatchBeProcessed(batchDir) == false) { return createEmptyResult(); } - LogUtils log = new LogUtils(datasetsParentDir); - DataSetMappingInformationFile datasetMappingFile = - DatasetMappingUtil.tryGetDatasetsMapping(datasetsParentDir, log); - if (datasetMappingFile == null || datasetMappingFile.tryGetMappings() == null) + LogUtils log = new LogUtils(batchDir); + DataSetMappingInformationFile mappingFile = + DatasetMappingUtil.tryGetDatasetsMapping(batchDir, log); + if (mappingFile == null || mappingFile.tryGetMappings() == null) { - touchErrorMarkerFile(datasetsParentDir, log); - sendNotificationsIfNecessary(log, tryGetEmail(datasetMappingFile)); - return createEmptyResult(); + return flushErrors(batchDir, mappingFile, log); + } + if (callPreprocessingScript(batchDir, log) == false) + { + return flushErrors(batchDir, mappingFile, log); + } + return processDatasets(batchDir, log, mappingFile.tryGetMappings(), mappingFile + .getNotificationEmail()); + } + + private ArrayList<DataSetInformation> flushErrors(File batchDir, + DataSetMappingInformationFile datasetMappingFileOrNull, LogUtils log) + { + touchErrorMarkerFile(batchDir, log); + sendNotificationsIfNecessary(log, tryGetEmail(datasetMappingFileOrNull)); + return createEmptyResult(); + } + + // false if script failed + private boolean callPreprocessingScript(File batchDir, LogUtils log) + { + boolean ok = writeAccessSetter.execute(batchDir.getName()); + if (ok == false) + { + log.error("No datasets from '%s' directory can be processed because " + + "the try to acquire write access by openBIS has failed. " + + "Try again or contact your administrator.", batchDir.getName()); } - return processDatasets(datasetsParentDir, log, datasetMappingFile.tryGetMappings(), - datasetMappingFile.getNotificationEmail()); + return ok; } - private List<DataSetInformation> processDatasets(File datasetsParentDir, LogUtils log, + private List<DataSetInformation> processDatasets(File batchDir, LogUtils log, TableMap<String, DataSetMappingInformation> mappings, String notificationEmail) { List<DataSetInformation> processedDatasetFiles = createEmptyResult(); Set<String> processedFiles = new HashSet<String>(); - List<File> files = listAll(datasetsParentDir); + List<File> files = listAll(batchDir); for (File file : files) { + // we have already tries to acquire write access to all files in batch directory, + // but some new files may have appeared since that time. + boolean isWritable = acquireWriteAccess(batchDir, file); + if (isWritable == false) + { + logNonWritable(file, log); + continue; + } if (canDatasetBeProcessed(file, mappings, log)) { processedDatasetFiles.addAll(delegator.handleDataSet(file)); processedFiles.add(file.getName().toLowerCase()); } } - clean(datasetsParentDir, processedFiles, log, mappings.values().size()); + clean(batchDir, processedFiles, log, mappings.values().size()); sendNotificationsIfNecessary(log, notificationEmail); return processedDatasetFiles; } + private void logNonWritable(File file, LogUtils log) + { + log.error("Could not acquire write access to '%s'. " + + "Try again or contact your administrator.", file.getPath()); + } + + // Acquires write access if the file is not writable. + // Returns true if file is writable afterwards. + private boolean acquireWriteAccess(File batchDir, File file) + { + if (isWritable(batchDir) == false) + { + String path = + batchDir.getName() + System.getProperty("file.separator") + file.getName(); + boolean ok = writeAccessSetter.execute(path); + if (ok == false) + { + LogUtils.adminError("Cannot acquire write access to '%s' " + + "because write access setter failed", path); + } + return isWritable(batchDir); + } else + { + return true; + } + } + + private static boolean isWritable(File file) + { + return file.canWrite(); + } + private void sendNotificationsIfNecessary(LogUtils log, String email) { log.sendNotificationsIfNecessary(mailClient, email); @@ -120,19 +190,19 @@ public class BatchDataSetHandler implements IDataSetHandler return new ArrayList<DataSetInformation>(); } - private static boolean canBatchBeProcessed(File parentDir) + // true if we deal with a directory which contains no error marker file and is not empty + private static boolean canBatchBeProcessed(File batchDir) { - if (parentDir.isDirectory() == false) + if (batchDir.isDirectory() == false) { return false; } - if (errorMarkerFileExists(parentDir)) + if (errorMarkerFileExists(batchDir)) { return false; } - List<File> files = listAll(parentDir); + List<File> files = listAll(batchDir); // Do not treat empty directories as faulty. - // The other reason of this check is that this handler is sometimes no able to delete // processed directories. It happens when they are mounted on NAS and there are some // hidden .nfs* files. @@ -143,35 +213,34 @@ public class BatchDataSetHandler implements IDataSetHandler return true; } - private static boolean errorMarkerFileExists(File datasetsParentDir) + private static boolean errorMarkerFileExists(File batchDir) { - return new File(datasetsParentDir, ERROR_MARKER_FILE).isFile(); + return new File(batchDir, ERROR_MARKER_FILE).isFile(); } - private static void cleanMappingFile(File datasetsParentDir, Set<String> processedFiles, - LogUtils log) + private static void cleanMappingFile(File batchDir, Set<String> processedFiles, LogUtils log) { - DatasetMappingUtil.cleanMappingFile(datasetsParentDir, processedFiles, log); + DatasetMappingUtil.cleanMappingFile(batchDir, processedFiles, log); } - private static void clean(File datasetsParentDir, Set<String> processedFiles, LogUtils log, + private static void clean(File batchDir, Set<String> processedFiles, LogUtils log, int datasetMappingsNumber) { - cleanMappingFile(datasetsParentDir, processedFiles, log); + cleanMappingFile(batchDir, processedFiles, log); int unprocessedDatasetsCounter = datasetMappingsNumber - processedFiles.size(); - if (unprocessedDatasetsCounter == 0 && hasNoPotentialDatasetFiles(datasetsParentDir)) + if (unprocessedDatasetsCounter == 0 && hasNoPotentialDatasetFiles(batchDir)) { - cleanDatasetsDir(datasetsParentDir); + cleanDatasetsDir(batchDir, log); } else { - touchErrorMarkerFile(datasetsParentDir, log); + touchErrorMarkerFile(batchDir, log); } } - private static void touchErrorMarkerFile(File parentDir, LogUtils log) + private static void touchErrorMarkerFile(File batchDir, LogUtils log) { - File errorMarkerFile = new File(parentDir, ERROR_MARKER_FILE); + File errorMarkerFile = new File(batchDir, ERROR_MARKER_FILE); if (errorMarkerFile.isFile()) { return; @@ -189,30 +258,29 @@ public class BatchDataSetHandler implements IDataSetHandler .getPath()); } else { - log.warning( - "Correct the errors and delete the '%s' file to start processing again.", + log.warning("Correct the errors and delete the '%s' file to start processing again.", ERROR_MARKER_FILE); } } - private static void cleanDatasetsDir(File datasetsParentDir) + private static void cleanDatasetsDir(File batchDir, LogUtils log) { - LogUtils.deleteUserLog(datasetsParentDir); - DatasetMappingUtil.deleteMappingFile(datasetsParentDir); - deleteEmptyDir(datasetsParentDir); + LogUtils.deleteUserLog(batchDir); + DatasetMappingUtil.deleteMappingFile(batchDir, log); + deleteEmptyDir(batchDir); } // Checks that the sample from the mapping exists and is assigned to the experiment - we do not // want to move datasets to unidentified directory in this case. - private boolean canDatasetBeProcessed(File file, + private boolean canDatasetBeProcessed(File dataset, TableMap<String, DataSetMappingInformation> datasetsMapping, LogUtils log) { - if (DatasetMappingUtil.isMappingFile(file)) + if (DatasetMappingUtil.isMappingFile(dataset)) { return false; } DataSetMappingInformation mapping = - DatasetMappingUtil.tryGetDatasetMapping(file, datasetsMapping); + DatasetMappingUtil.tryGetDatasetMapping(dataset, datasetsMapping); if (mapping == null) { return false; @@ -220,7 +288,7 @@ public class BatchDataSetHandler implements IDataSetHandler return datasetMappingResolver.isMappingCorrect(mapping, log); } - private static void deleteEmptyDir(File dir) + private static boolean deleteEmptyDir(File dir) { boolean ok = dir.delete(); if (ok == false) @@ -229,11 +297,12 @@ public class BatchDataSetHandler implements IDataSetHandler "The directory '%s' cannot be deleted although it seems to be empty.", dir .getPath()); } + return ok; } - private static boolean hasNoPotentialDatasetFiles(File dir) + private static boolean hasNoPotentialDatasetFiles(File batchDir) { - List<File> files = listAll(dir); + List<File> files = listAll(batchDir); int datasetsCounter = files.size(); for (File file : files) { diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingUtil.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingUtil.java index ad0e69e13321315030c368d3578b7bdf6aead966..fcb330eca66660b39b54f5e8b30aac0524c28e05 100644 --- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingUtil.java +++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/DatasetMappingUtil.java @@ -168,15 +168,11 @@ class DatasetMappingUtil } } - public static DataSetMappingInformationFile tryGetDatasetsMapping(File parentDir, LogUtils log) + public static DataSetMappingInformationFile tryGetDatasetsMapping(File batchDir, LogUtils log) { - File mappingFile = tryGetMappingFile(parentDir); + File mappingFile = tryGetMappingFile(batchDir, log); if (mappingFile == null) { - log.error("No datasets from the directory '%s' can be processed " - + "because a file with extension '%s' which contains dataset descriptions " - + "does not exist or there is more than one file with taht extension.", - parentDir.getName(), CollectionUtils.abbreviate(MAPPING_FILE_EXTENSIONS, -1)); return null; } String notificationEmail = tryGetEmail(mappingFile, log); @@ -208,19 +204,29 @@ class DatasetMappingUtil return false; } - private static File tryGetMappingFile(File parentDir) + private static File tryGetMappingFile(File batchDir, LogUtils log) { - List<File> potentialMappingFiles = listPotentialMappingFiles(parentDir); - if (potentialMappingFiles.size() != 1) + List<File> potentialMappingFiles = listPotentialMappingFiles(batchDir); + String errorMsgPrefix = "No datasets from the directory '%s' can be processed because "; + String batchDirName = batchDir.getName(); + if (potentialMappingFiles.size() == 0) { + log.error(errorMsgPrefix + + "there is no file with extension '%s' which contains dataset descriptions.", + batchDirName, CollectionUtils.abbreviate(MAPPING_FILE_EXTENSIONS, -1)); return null; } - File indexFile = potentialMappingFiles.get(0); - if (indexFile.isFile() == false) + + if (potentialMappingFiles.size() > 1) { + log.error(errorMsgPrefix + "there is more than one file with extension '%s'.", + batchDirName, CollectionUtils.abbreviate(MAPPING_FILE_EXTENSIONS, -1)); return null; - } else if (indexFile.canWrite() == false) + } + File indexFile = potentialMappingFiles.get(0); + if (indexFile.isFile() == false) { + log.error(errorMsgPrefix + "'%s' is not a file.", batchDirName, indexFile.getName()); return null; } else { @@ -233,13 +239,14 @@ class DatasetMappingUtil return FileUtilities.listFiles(dataSet, MAPPING_FILE_EXTENSIONS, false, null); } - public static void deleteMappingFile(File parentDir) + public static boolean deleteMappingFile(File batchDir, LogUtils log) { - File mappingFile = tryGetMappingFile(parentDir); + File mappingFile = tryGetMappingFile(batchDir, log); if (mappingFile != null && mappingFile.isFile()) { - mappingFile.delete(); + return mappingFile.delete(); } + return true; } /** @@ -247,9 +254,9 @@ class DatasetMappingUtil * * @param processedFiles files which should be removed from the mapping file */ - public static void cleanMappingFile(File parentDir, Set<String> processedFiles, LogUtils log) + public static void cleanMappingFile(File batchDir, Set<String> processedFiles, LogUtils log) { - File mappingFile = tryGetMappingFile(parentDir); + File mappingFile = tryGetMappingFile(batchDir, log); if (mappingFile == null) { return; diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/LogUtils.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/LogUtils.java index e6446deccdbc770b82419912cfb31a8c3cf76b1e..e437634cef7d0dc2e928b4bf531159757cceef6b 100644 --- a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/LogUtils.java +++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/LogUtils.java @@ -90,7 +90,7 @@ class LogUtils private void appendNotification(String messageFormat, Object... arguments) { errorMessages.append(String.format(messageFormat, arguments)); - errorMessages.append("\n"); + errorMessages.append("\r\n"); } /** has to be called at the end to send all notifications in one email */ @@ -156,7 +156,7 @@ class LogUtils Object... arguments) { String now = new Date().toString(); - String message = now + " " + messageKind + ": " + format(messageFormat, arguments) + "\n"; + String message = now + " " + messageKind + ": " + format(messageFormat, arguments) + "\r\n"; return message; } @@ -190,12 +190,13 @@ class LogUtils return file.getName().equals(ConstantsYeastX.USER_LOG_FILE); } - public static void deleteUserLog(File loggingDir) + public static boolean deleteUserLog(File loggingDir) { File file = getUserLogFile(loggingDir); if (file.isFile()) { - file.delete(); + return file.delete(); } + return true; } } diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/PreprocessingExecutor.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/PreprocessingExecutor.java new file mode 100644 index 0000000000000000000000000000000000000000..3706b20c7b792aa24053cec86fcd2433d565329b --- /dev/null +++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/etl/PreprocessingExecutor.java @@ -0,0 +1,81 @@ +/* + * Copyright 2009 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.yeastx.etl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.log4j.Logger; + +import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException; +import ch.systemsx.cisd.common.logging.LogCategory; +import ch.systemsx.cisd.common.logging.LogFactory; +import ch.systemsx.cisd.common.process.ProcessExecutionHelper; + +/** + * A class which is configured from properties and is able to execute a script from the command line + * using the configured path {@link #PREPROCESSING_SCRIPT_PATH}. + * + * @author Tomasz Pylak + */ +public class PreprocessingExecutor +{ + /** + * A path to a script which should be called from command line for every dataset batch before it + * is processed. Can be used e.g. to change file permissions. The script gets one parameter, the + * path to the dataset file, relative to the incoming directory. + */ + public final static String PREPROCESSING_SCRIPT_PATH = "preprocessing-script"; + + public static PreprocessingExecutor create(Properties properties) + { + String preprocessingScriptPath = properties.getProperty(PREPROCESSING_SCRIPT_PATH); + if (preprocessingScriptPath != null) + { + return new PreprocessingExecutor(preprocessingScriptPath); + } else + { + throw EnvironmentFailureException.fromTemplate("Property '%s' is not set!", + PREPROCESSING_SCRIPT_PATH); + } + } + + private final String preprocessingScriptPath; + + private PreprocessingExecutor(String preprocessingScriptPath) + { + this.preprocessingScriptPath = preprocessingScriptPath; + } + + public boolean execute(String filePath) + { + return callScript(preprocessingScriptPath, getClass(), filePath); + } + + private static boolean callScript(String scriptPath, Class<?> logClass, String... args) + { + List<String> cmd = new ArrayList<String>(); + cmd.add(scriptPath); + cmd.addAll(Arrays.asList(args)); + Logger operationLog = LogFactory.getLogger(LogCategory.OPERATION, logClass); + Logger machineLog = LogFactory.getLogger(LogCategory.MACHINE, logClass); + return ProcessExecutionHelper.runAndLog(cmd, operationLog, machineLog); + } + +} \ No newline at end of file