diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdater.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdater.java index 45619f59765ba7274174728dc42fd59d49c474fe..0a116ef2358057df408b6d62393d33b7356ae95e 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdater.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdater.java @@ -18,9 +18,15 @@ package ch.systemsx.cisd.etlserver.plugins; import java.io.File; import java.io.IOException; +import java.text.DateFormat; +import java.text.ParseException; +import java.util.ArrayList; import java.util.Collection; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; +import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Properties; import java.util.Set; @@ -39,9 +45,11 @@ import ch.systemsx.cisd.common.maintenance.IDataStoreLockingMaintenanceTask; import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.common.properties.PropertyUtils; import ch.systemsx.cisd.common.reflection.ClassUtils; +import ch.systemsx.cisd.openbis.dss.generic.server.MetaDataBuilder; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; import ch.systemsx.cisd.openbis.dss.generic.shared.utils.DssPropertyParametersUtil; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; import ch.systemsx.cisd.openbis.generic.shared.dto.SimpleDataSetInformationDTO; /** @@ -62,6 +70,16 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT public static final String LINK_FROM_FIRST_CHILD = "link-from-first-child"; + /** + * Property indicating if only links should be created, rather then directory with link and meta-data file. + */ + public static final String LINKS_ONLY = "links-only"; + + /** + * Name of the link to create in a directory in links-only is set to false + */ + public static final String LINK_DIRECTORY = "data"; + private static final String REBUILDING_HIERARCHICAL_STORAGE = "Rebuilding hierarchical storage"; private static final Logger operationLog = @@ -98,6 +116,8 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT private File hierarchyRoot; + private boolean storeLinksOnly; + private Map<String /* data set type */, LinkSourceDescriptor> linkSourceDescriptors; @Override @@ -117,6 +137,7 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT storeRoot = new File(storeRootFileName); hierarchyRoot = new File(hierarchyRootFileName); linkSourceDescriptors = initializeLinkSourceDescriptors(pluginProperties); + storeLinksOnly = pluginProperties.getProperty(LINKS_ONLY, hierarchyRootFileName).equals("true"); operationLog.info("Plugin initialized with: store root = " + storeRootFileName + ", hierarchy root = " + hierarchyRootFileName); @@ -189,35 +210,167 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT private void rebuildHierarchy() { operationLog.info(REBUILDING_HIERARCHICAL_STORAGE); - Map<String, String> newLinkMappings = convertDataToLinkMappings(); - Set<String> toCreate = new HashSet<String>(newLinkMappings.keySet()); - Set<String> toDelete = linkNamingStrategy.extractPaths(hierarchyRoot); - Set<String> dontTouch = intersection(toCreate, toDelete); - toCreate.removeAll(dontTouch); - toDelete.removeAll(dontTouch); - removeUnnecessaryMappings(newLinkMappings, toCreate); - deleteObsoleteLinks(toDelete); - createLinksForChangedData(newLinkMappings); + List<DataSetInformation> newLinkMappings = collectDataSet(); + + Set<String> existingPaths = linkNamingStrategy.extractPaths(hierarchyRoot); + + for (DataSetInformation dataSetInformation : newLinkMappings) + { + String targetPath = dataSetInformation.targetFile.getAbsolutePath(); + if (existingPaths.contains(targetPath)) + { + existingPaths.remove(targetPath); + handleExistingEntry(dataSetInformation); + } else + { + handleNonExistingEntry(dataSetInformation); + } + } + + // by this time - only paths which should be deleted are left in the existingPaths + deleteObsoleteLinks(existingPaths); + } + + private void handleNonExistingEntry(DataSetInformation info) + { + if (storeLinksOnly) + { + createLink(info.targetFile, info.linkSource); + } else + { + createDataSetFolder(info); + } + } + + /** + * Handle a case when the data set directory already exists in the hierarchical store. In this case we should check if the metadata file is up to + * date and recreate it if necessary + */ + private void handleExistingEntry(DataSetInformation info) + { + String errorMsgLinksOnlyModeChanged = "The state of hierarchical store is corrupted or property '" + LINKS_ONLY + + "' has been modified after hierarchical store has been built. In this case please the hierarchical store directory and it will be recreated."; + if (storeLinksOnly) + { + if (FileUtilities.isSymbolicLink(info.targetFile)) + { + // nothing to do as the link is already in place + return; + } else + { + throw new IllegalStateException(errorMsgLinksOnlyModeChanged); + } + } else + { + if (info.targetFile.isDirectory()) + { + Date storedModificationDate = getModificationDateFromFile(info); + if (storedModificationDate == null || storedModificationDate.before(info.dto.getModificationDate())) + { + createDataSetFolder(info); + } + } else + { + throw new IllegalStateException(errorMsgLinksOnlyModeChanged); + } + } + } + + private final DateFormat dateFormat = DateFormat.getDateTimeInstance(DateFormat.DEFAULT, DateFormat.DEFAULT, Locale.US); + + private void createDataSetFolder(DataSetInformation info) + { + createLink(new File(info.targetFile, LINK_DIRECTORY), info.linkSource); + createModificationDateFile(info); + createMetaDataFile(info); + } + + private void createMetaDataFile(DataSetInformation info) + { + File file = new File(info.targetFile, "meta-data.tsv"); + String content = MetaDataBuilder.createMetaData(info.dto); + FileUtilities.writeToFile(file, content); + } + + private void createModificationDateFile(DataSetInformation info) + { + File file = new File(info.targetFile, "modification_timestamp"); + FileUtilities.writeToFile(file, dateFormat.format(info.dto.getModificationDate())); + } + + private Date getModificationDateFromFile(DataSetInformation info) + { + File file = new File(info.targetFile, "modification_timestamp"); + if (file.exists() == false) + return null; + String content = FileUtilities.loadToString(file); + try + { + return dateFormat.parse(content); + } catch (ParseException pe) + { + operationLog.error("Modificaction date of dataset stored in Hierarchical store in file " + file.getAbsolutePath() + " is corrupted"); + return null; + } + } + + private class DataSetInformation + { + /** + * The DTO object that should be linked + */ + AbstractExternalData dto; + + /** + * Path where the dataset metadata and link to store should be placed + */ + File targetFile; + + /** + * The location in dss store that should be linked + */ + File linkSource; + } + + private HashMap<String, AbstractExternalData> getAbstractExternalDataByCode(Collection<SimpleDataSetInformationDTO> dataSets) + { + List<String> codes = new ArrayList<>(); + for (SimpleDataSetInformationDTO dataSet : dataSets) + { + codes.add(dataSet.getDataSetCode()); + } + List<AbstractExternalData> listDataSetsByCode = openBISService.listDataSetsByCode(codes); + HashMap<String, AbstractExternalData> dataSetsByCode = new HashMap<>(); + for (AbstractExternalData abstractExternalData : listDataSetsByCode) + { + dataSetsByCode.put(abstractExternalData.getCode(), abstractExternalData); + } + return dataSetsByCode; } /** - * Extracts a {@link Map}: (target,source) from a collection of data sets. + * Extracts a {@link Map}: (target,source) from a collection of data sets */ - private Map<String, String> convertDataToLinkMappings() + private List<DataSetInformation> collectDataSet() { Collection<SimpleDataSetInformationDTO> dataSets = openBISService.listPhysicalDataSets(); - Map<String, String> linkMappings = new HashMap<String, String>(); + HashMap<String, AbstractExternalData> dataSetsByCode = getAbstractExternalDataByCode(dataSets); + ArrayList<DataSetInformation> linkMappings = new ArrayList<DataSetInformation>(); for (SimpleDataSetInformationDTO dataSet : dataSets) { + AbstractExternalData abstractData = dataSetsByCode.get(dataSet.getDataSetCode()); File targetFile = new File(hierarchyRoot, linkNamingStrategy.createHierarchicalPath(dataSet)); File share = new File(storeRoot, dataSet.getDataSetShareId()); File dataSetLocationRoot = new File(share, dataSet.getDataSetLocation()); File linkSource = determineLinkSource(dataSetLocationRoot, dataSet.getDataSetType()); - if (linkSource != null) - { - linkMappings.put(targetFile.getAbsolutePath(), linkSource.getAbsolutePath()); - } else + + DataSetInformation info = new DataSetInformation(); + info.dto = abstractData; + info.linkSource = linkSource; + info.targetFile = targetFile; + + if (linkSource == null) { String logMessage = @@ -225,6 +378,9 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT + "dataSetType='%s'. Link creation will be skipped.", dataSetLocationRoot, dataSet.getDataSetType()); operationLog.warn(logMessage); + } else + { + linkMappings.add(info); } } return linkMappings; @@ -275,31 +431,6 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT return linkSourceDescriptors.get(dataSetType); } - /** - * Removes from the <code>linkMappings</code> map all the elements with keys not belonging to <code>keep</code> set. - */ - private void removeUnnecessaryMappings(Map<String, String> linkMappings, Set<String> keep) - { - Set<String> keys = new HashSet<String>(linkMappings.keySet()); - for (String path : keys) - { - if (keep.contains(path) == false) - { - linkMappings.remove(path); - } - } - } - - /** - * Creates a new {@link Set} containing the elements that belong to both {@link Set}s. - */ - private Set<String> intersection(Set<String> setA, Set<String> setB) - { - Set<String> toBeUntouched = new HashSet<String>(setA); - toBeUntouched.retainAll(setB); - return toBeUntouched; - } - /** * Recursively removes from the file system files with paths defined in <code>toBeDeleted</code> {@link Set}. */ @@ -373,7 +504,11 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT { if (isUnderHierarchyRoot(file)) { - return FileUtilities.isSymbolicLink(file) || file.isDirectory(); + // we try to be safe and delete only links and files that we know we created + return FileUtilities.isSymbolicLink(file) || + file.isDirectory() || + file.getName().equals("modification_timestamp") || + file.getName().equals("meta-data.tsv"); } else { operationLog.warn("Aborting an attempt to delete content outside of hierarchy root : " @@ -391,17 +526,19 @@ public class HierarchicalStorageUpdater implements IDataStoreLockingMaintenanceT } /** - * Creates the soft links for files with paths defined in <code>linkMappings</code> {@link Map}. + * Creates the soft links defined files. If the link already exists it is being deleted and recreated. If the source file is null or doesn't exist + * then a link is not created. */ - private void createLinksForChangedData(Map<String, String> linkMappings) + private void createLink(File targetFile, File sourceFile) { - for (String targetPath : linkMappings.keySet()) + if (targetFile.exists()) + { + targetFile.delete(); + } + targetFile.getParentFile().mkdirs(); + if (sourceFile != null && sourceFile.exists()) { - File targetDir = new File(targetPath); - String sourcePath = linkMappings.get(targetPath); - File sourceFile = new File(sourcePath); - targetDir.getParentFile().mkdirs(); - SoftLinkMaker.createSymbolicLink(sourceFile, targetDir); + SoftLinkMaker.createSymbolicLink(sourceFile.getAbsoluteFile(), targetFile.getAbsoluteFile()); } } diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/TemplateBasedLinkNamingStrategy.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/TemplateBasedLinkNamingStrategy.java index 63cf880824b5a050bd005b1009a51decb0b61b4f..67b449b846bf516fa4f24f5ea91d441c01ba5092 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/TemplateBasedLinkNamingStrategy.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/TemplateBasedLinkNamingStrategy.java @@ -22,9 +22,11 @@ import java.util.Properties; import java.util.Set; import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; import ch.rinn.restrictions.Private; -import ch.systemsx.cisd.common.filesystem.FileUtilities; +import ch.systemsx.cisd.common.logging.LogCategory; +import ch.systemsx.cisd.common.logging.LogFactory; import ch.systemsx.cisd.common.properties.ExtendedProperties; import ch.systemsx.cisd.openbis.generic.shared.dto.SimpleDataSetInformationDTO; @@ -36,6 +38,9 @@ import ch.systemsx.cisd.openbis.generic.shared.dto.SimpleDataSetInformationDTO; public class TemplateBasedLinkNamingStrategy implements IHierarchicalStorageLinkNamingStrategy { + private static final Logger operationLog = + LogFactory.getLogger(LogCategory.OPERATION, TemplateBasedLinkNamingStrategy.class); + public static final String DEFAULT_LINK_TEMPLATE = "${space}/${project}/${experiment}/${dataSetType}+${sample}+${dataSet}"; @@ -93,7 +98,7 @@ public class TemplateBasedLinkNamingStrategy implements IHierarchicalStorageLink public Set<String> extractPaths(File root) { HashSet<String> set = new HashSet<String>(); - accumulateSymLinkPaths(set, root); + accumulatePaths(set, root, 3); return set; } @@ -105,20 +110,23 @@ public class TemplateBasedLinkNamingStrategy implements IHierarchicalStorageLink } @Private - static void accumulateSymLinkPaths(HashSet<String> paths, File dir) + static void accumulatePaths(HashSet<String> paths, File dir, int deepness) { File[] children = dir.listFiles(); if (children != null) { for (File child : children) { - if (FileUtilities.isSymbolicLink(child)) + if (deepness == 0) { String absolutePath = child.getAbsolutePath(); paths.add(absolutePath); } else if (child.isDirectory()) { - accumulateSymLinkPaths(paths, child); + accumulatePaths(paths, child, deepness - 1); + } else if (child.isFile()) + { + operationLog.warn("File in the Hierarchical store view at the unexpected depth " + child.getAbsolutePath()); } } } diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdaterTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdaterTest.java index af7a324eacf07237bfc7b879de191a880f785a6b..b460fed3ad74589ab9d6c4e41d913a4c0a62281a 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdaterTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/HierarchicalStorageUpdaterTest.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.util.ArrayList; +import java.util.Date; import java.util.List; import java.util.Properties; @@ -29,6 +30,7 @@ import org.jmock.Mockery; import org.springframework.beans.factory.BeanFactory; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import ch.systemsx.cisd.base.tests.AbstractFileSystemTestCase; @@ -36,6 +38,15 @@ import ch.systemsx.cisd.common.filesystem.FileUtilities; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProviderTestWrapper; import ch.systemsx.cisd.openbis.dss.generic.shared.utils.DssPropertyParametersUtil; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSetType; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Experiment; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ExperimentType; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PhysicalDataSet; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Project; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Sample; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.SampleType; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Space; import ch.systemsx.cisd.openbis.generic.shared.dto.SimpleDataSetInformationDTO; /** @@ -69,6 +80,7 @@ public class HierarchicalStorageUpdaterTest extends AbstractFileSystemTestCase super(false); } + @SuppressWarnings("unchecked") @BeforeMethod public void setUpMocks() throws Exception { @@ -88,6 +100,9 @@ public class HierarchicalStorageUpdaterTest extends AbstractFileSystemTestCase allowing(openBISService).listPhysicalDataSets(); will(returnValue(listDataSets())); + + allowing(openBISService).listDataSetsByCode(with(any(List.class))); + will(returnValue(listAbstractDataSets())); } }); } @@ -98,40 +113,74 @@ public class HierarchicalStorageUpdaterTest extends AbstractFileSystemTestCase ServiceProviderTestWrapper.restoreApplicationContext(); } - @Test - public void testDataIsNotDeletedAfterReconfig() throws Exception + @DataProvider(name = "Configs") + protected Object[][] getConfigs() + { + return new Object[][] { + { true }, { false } }; + } + + @Test(dataProvider = "Configs") + public void testDataIsNotDeletedAfterReconfig(boolean linksOnly) throws Exception { - updater().execute(); + updater(linksOnly).execute(); // execute with different configuration and attempt to damage the data store - reconfiguredUpdater().execute(); + reconfiguredUpdater(linksOnly).execute(); assertDataStoreNotDamaged(); } - @Test - public void testBrokenLinksAreDeleted() throws Exception + @Test(dataProvider = "Configs") + public void testBrokenLinksAreDeleted(boolean linksOnly) throws Exception { - HierarchicalStorageUpdater storageUpdater = createUpdater(true); + HierarchicalStorageUpdater storageUpdater = createUpdater(true, linksOnly); storageUpdater.execute(); File shareRoot = new File(getStoreRoot(), SHARE_ID); File dataSetSource = new File(shareRoot, "ds2"); assertTrue(dataSetSource.isDirectory()); - File symboliLink = + File symbolicLink = new File(getHierarchyRoot().getAbsolutePath() + "/space/project/experiment/dataset-type+sample+ds2"); - assertTrue("Symbolic links should be created", FileUtilities.isSymbolicLink(symboliLink)); + if (linksOnly) + { + assertTrue("Symbolic link should be created", FileUtilities.isSymbolicLink(symbolicLink)); + } else + { + assertTrue("Directory should be created", symbolicLink.isDirectory()); + } FileUtilities.deleteRecursively(dataSetSource); storageUpdater.execute(); - assertTrue("Broken symlinks should be deleted", false == symboliLink.exists()); + assertTrue("Broken symlinks should be deleted", false == symbolicLink.exists()); + } + + @Test + public void testMetaDataCreated() throws Exception + { + HierarchicalStorageUpdater storageUpdater = createUpdater(true, false); + storageUpdater.execute(); + + File shareRoot = new File(getStoreRoot(), SHARE_ID); + File dataSetSource = new File(shareRoot, "ds2"); + assertTrue(dataSetSource.isDirectory()); + + File directory = + new File(getHierarchyRoot().getAbsolutePath() + + "/space/project/experiment/dataset-type+sample+ds2"); + assertTrue("Directory should be created", directory.isDirectory()); + + File metaDataFile = new File(directory, "meta-data.tsv"); + assertTrue("metadata files created", metaDataFile.exists()); + List<String> content = FileUtilities.loadToStringList(metaDataFile); + assertEquals("data_set\tcode\tds2", content.get(0)); } private void prepareDirectoryStructures() throws IOException @@ -164,17 +213,17 @@ public class HierarchicalStorageUpdaterTest extends AbstractFileSystemTestCase assertEquals(errMessage, templateSize, rootSize); } - private HierarchicalStorageUpdater updater() + private HierarchicalStorageUpdater updater(boolean linksOnly) { - return createUpdater(false); + return createUpdater(false, linksOnly); } - private HierarchicalStorageUpdater reconfiguredUpdater() + private HierarchicalStorageUpdater reconfiguredUpdater(boolean linksOnly) { - return createUpdater(true); + return createUpdater(true, linksOnly); } - private HierarchicalStorageUpdater createUpdater(boolean linkFromFirstChild) + private HierarchicalStorageUpdater createUpdater(boolean linkFromFirstChild, boolean onlyLinks) { final String pluginName = "hierarchical-storage-updater"; @@ -190,6 +239,8 @@ public class HierarchicalStorageUpdaterTest extends AbstractFileSystemTestCase "" + true); } + properties.put(HierarchicalStorageUpdater.LINKS_ONLY, onlyLinks ? "true" : "false"); + HierarchicalStorageUpdater updater = new HierarchicalStorageUpdater(); updater.setUp(pluginName, properties); return updater; @@ -214,4 +265,45 @@ public class HierarchicalStorageUpdaterTest extends AbstractFileSystemTestCase } return result; } + + List<AbstractExternalData> listAbstractDataSets() + { + final File shareRoot = new File(getStoreRoot(), SHARE_ID); + final List<AbstractExternalData> result = new ArrayList<>(); + for (File directory : FileUtilities.listDirectories(shareRoot, false)) + { + PhysicalDataSet dataset = new PhysicalDataSet(); + result.add(dataset); + dataset.setDataSetType(new DataSetType(DATASET_TYPE)); + dataset.setCode(directory.getName()); + dataset.setLocation(directory.getName()); + dataset.setShareId(SHARE_ID); + + Space space = new Space(); + space.setCode("space"); + Project project = new Project(); + project.setSpace(space); + Experiment experiment = new Experiment(); + experiment.setProject(project); + experiment.setCode("experiment"); + + ExperimentType experimentType = new ExperimentType(); + experimentType.setCode("experiment_type"); + experiment.setExperimentType(experimentType); + dataset.setExperiment(experiment); + + Sample sample = new Sample(); + sample.setCode("sample"); + sample.setSpace(space); + sample.setExperiment(experiment); + SampleType sampleType = new SampleType(); + sampleType.setCode("sample_type"); + sample.setSampleType(sampleType); + dataset.setSample(sample); + + dataset.setModificationDate(new Date()); + } + return result; + } + }