From 8bbf5877ffc08bb3e2730c633cbfc454f5eac93f Mon Sep 17 00:00:00 2001
From: kaloyane <kaloyane>
Date: Tue, 8 Nov 2011 14:38:38 +0000
Subject: [PATCH] [LMS-2481] compress dataset into HDF5 as a postgregistration
 task

SVN: 23588
---
 .../cisd/common/filesystem/FileUtilities.java |  11 +-
 .../Hdf5CompressingPostRegistrationTask.java  | 499 ++++++++++++++++++
 .../server/EncapsulatedOpenBISService.java    |  11 +
 .../shared/IEncapsulatedOpenBISService.java   |  12 +
 .../shared/dto/DataSetInformation.java        |  14 +
 .../generic/server/AbstractServer.java        |  33 ++
 .../openbis/generic/server/CommonServer.java  |  36 +-
 .../openbis/generic/server/ETLService.java    |  18 +
 .../generic/server/ETLServiceLogger.java      |  14 +
 .../bo/datasetlister/DatasetLister.java       |   5 +-
 .../generic/shared/IETLLIMSService.java       |  27 +
 11 files changed, 643 insertions(+), 37 deletions(-)
 create mode 100644 datastore_server/source/java/ch/systemsx/cisd/etlserver/postregistration/Hdf5CompressingPostRegistrationTask.java

diff --git a/common/source/java/ch/systemsx/cisd/common/filesystem/FileUtilities.java b/common/source/java/ch/systemsx/cisd/common/filesystem/FileUtilities.java
index d14d458b488..162a31e0904 100644
--- a/common/source/java/ch/systemsx/cisd/common/filesystem/FileUtilities.java
+++ b/common/source/java/ch/systemsx/cisd/common/filesystem/FileUtilities.java
@@ -1220,11 +1220,20 @@ public final class FileUtilities
      * 
      * @returns <code>true</code> if and only if <var>file</var> has 'h5' or 'h5ar' as extension
      */
-    public final static boolean isHDF5ContainerFile(File file)
+    public final static boolean hasHDF5ContainerSuffix(File file)
     {
         return FilenameUtils.isExtension(file.getName().toLowerCase(), Arrays.asList("h5", "h5ar"));
     }
 
+    /**
+     * @returns <code>true</code> if and only if <var>file</var> is a valid file and has 'h5' or
+     *          'h5ar' as extension
+     */
+    public final static boolean isHDF5ContainerFile(File file)
+    {
+        return file.isFile() && hasHDF5ContainerSuffix(file);
+    }
+
     /**
      * Lists all resources in a given directory.
      * 
diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/postregistration/Hdf5CompressingPostRegistrationTask.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/postregistration/Hdf5CompressingPostRegistrationTask.java
new file mode 100644
index 00000000000..01d51aeb229
--- /dev/null
+++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/postregistration/Hdf5CompressingPostRegistrationTask.java
@@ -0,0 +1,499 @@
+/*
+ * Copyright 2011 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.etlserver.postregistration;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+
+import ch.systemsx.cisd.base.exceptions.CheckedExceptionTunnel;
+import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException;
+import ch.systemsx.cisd.common.filesystem.FileUtilities;
+import ch.systemsx.cisd.common.hdf5.HDF5Container;
+import ch.systemsx.cisd.common.hdf5.HierarchicalStructureDuplicatorFileToHDF5;
+import ch.systemsx.cisd.common.io.hierarchical_content.api.IHierarchicalContent;
+import ch.systemsx.cisd.common.io.hierarchical_content.api.IHierarchicalContentNode;
+import ch.systemsx.cisd.common.logging.ISimpleLogger;
+import ch.systemsx.cisd.common.logging.LogCategory;
+import ch.systemsx.cisd.common.logging.LogFactory;
+import ch.systemsx.cisd.common.utilities.PropertyUtils;
+import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService;
+import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider;
+import ch.systemsx.cisd.openbis.dss.generic.shared.dto.DataSetInformation;
+import ch.systemsx.cisd.openbis.generic.shared.basic.TechId;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Code;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ContainerDataSet;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.DataSet;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ExternalData;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.IEntityProperty;
+import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetUpdatesDTO;
+import ch.systemsx.cisd.openbis.generic.shared.dto.NewExternalData;
+import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty;
+import ch.systemsx.cisd.openbis.generic.shared.dto.StorageFormat;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifier;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.ExperimentIdentifierFactory;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifier;
+import ch.systemsx.cisd.openbis.generic.shared.dto.identifier.SampleIdentifierFactory;
+
+/**
+ * A post-registration task replacing uncompressed datasets with new datasets which are HDF5
+ * compressed.
+ * 
+ * <pre>
+ * In order for a data set to be compressed the following prerequisites must be met 
+ * 
+ * 1) The data set is part of a container. This is required, because the contents of the new data set will 
+ * overshadow the existing data set in the container.
+ * 2) The data set type must match a configuration property
+ * 3) The data set must contain a folder of a specified name which ends with ".h5" i.e. instead of 
+ * "original" the root directory with contents must be named "original.h5". This is requirement enforces
+ * that no links in the UI are broken when the non-compressed data set gets deleted.
+ * 
+ * </pre>
+ * 
+ * @author Kaloyan Enimanev
+ */
+public class Hdf5CompressingPostRegistrationTask extends AbstractPostRegistrationTask
+{
+    private static final String DATA_SET_TYPES = "data-set-types";
+
+    private static final String HDF5_COMPRESSION_CLEANUP_MARKERS_DIRNAME = "hdf5-cleanup-markers";
+
+    private static final Logger operationLog = LogFactory.getLogger(LogCategory.OPERATION,
+            Hdf5CompressingPostRegistrationTask.class);
+
+    /**
+     * white list of the data set types to be compressed.
+     */
+    private final Set<String> processedDataSetTypes;
+    
+    public Hdf5CompressingPostRegistrationTask(Properties properties, IEncapsulatedOpenBISService service)
+    {
+        super(properties, service);
+        processedDataSetTypes =
+                new HashSet<String>(PropertyUtils.tryGetList(properties, DATA_SET_TYPES));
+    }
+
+    /**
+     * do not allow concurrent maintenance tasks to run if they alter the data store contents.
+     */
+    public boolean requiresDataStoreLock()
+    {
+        return true;
+    }
+
+    public IPostRegistrationTaskExecutor createExecutor(String dataSetCode, boolean container)
+    {
+        if (container)
+        {
+            return DummyPostRegistrationTaskExecutor.INSTANCE;
+        }
+        return new Executor(dataSetCode);
+    }
+
+    private final class Executor implements IPostRegistrationTaskExecutor
+    {
+        private final String dataSetCode;
+
+        Executor(String dataSetCode)
+        {
+            this.dataSetCode = dataSetCode;
+        }
+
+        public void execute()
+        {
+
+            ExternalData externalData = tryGetDataSet(dataSetCode, service);
+            if (false == shouldCompressToHdf5(externalData))
+            {
+                return;
+            }
+            
+            IHierarchicalContent hierarchicalContent =
+                ServiceProvider.getHierarchicalContentProvider().asContent(externalData);
+            try
+            {
+                if (false == hasFoldersForCompressing(hierarchicalContent)) {
+                    operationLog.info(String.format(
+                            " Data set '%s' meets the criterion for HDF5 compression "
+                                    + "in post registration, but it contains no folder named "
+                                    + "'*.h5'. HDF5 compression will be skipped...", dataSetCode));
+                    return;
+                }
+                String hdf5DataSetCode = service.createDataSetCode();
+                DataSet dataSet = (DataSet) externalData;
+                File hdf5DataSetDir =
+                        createNewDataSetDirectory(hierarchicalContent, hdf5DataSetCode);
+                File cleanupMarker = createCleanupMarker(dataSet.getShareId(), hdf5DataSetDir);
+                createCompressedDuplicate(hdf5DataSetDir, hierarchicalContent);
+                registerTwinDataset(hdf5DataSetCode, dataSet);
+                removeOldDataSet(dataSetCode, "Replaced by '" + hdf5DataSetCode + "'");
+                cleanupMarker.delete();
+            } finally
+            {
+                hierarchicalContent.close();
+            }
+        }
+
+        private File createCleanupMarker(String shareId, File hdf5DataSetDir)
+        {
+            File markerFile = getCleanupMarkerFile(dataSetCode, shareId);
+            final File markerDir = markerFile.getParentFile();
+            markerDir.mkdirs();
+            if (false == markerDir.exists())
+            {
+                throw new EnvironmentFailureException(
+                        "Cannot created HDF5 compression marker directory ");
+            }
+
+            FileUtilities.writeToFile(markerFile, hdf5DataSetDir.getAbsolutePath());
+            return markerFile;
+        }
+
+        private void removeOldDataSet(String dataSetToDelete, String reason)
+        {
+            service.removeDataSetsPermanently(Collections.singletonList(dataSetToDelete), reason);
+        }
+
+        private void createCompressedDuplicate(File stagingDir,
+                IHierarchicalContent hierarchicalContent)
+        {
+            IHierarchicalContentNode root = hierarchicalContent.getRootNode();
+            for (IHierarchicalContentNode child : root.getChildNodes())
+            {
+                File fileOrFolder = child.getFile();
+                if (shouldBeCompressed(fileOrFolder))
+                {
+                    File h5ContainerFile = new File(stagingDir, fileOrFolder.getName());
+                    HDF5Container container = new HDF5Container(h5ContainerFile);
+                    container.runWriterClient(true,
+                            new HierarchicalStructureDuplicatorFileToHDF5.DuplicatorWriterClient(
+                                    fileOrFolder));
+                } else
+                {
+                    copy(fileOrFolder, stagingDir);
+                }
+            }
+
+        }
+
+        private void copy(File fileOrFolder, File toDir)
+        {
+            try
+            {
+                if (fileOrFolder.isFile())
+                {
+                    FileUtils.copyFileToDirectory(fileOrFolder, toDir);
+                } else
+                {
+                    FileUtils.copyDirectoryToDirectory(fileOrFolder, toDir);
+                }
+            } catch (IOException ioex)
+            {
+                throw CheckedExceptionTunnel.wrapIfNecessary(ioex);
+            }
+        }
+
+        private File createNewDataSetDirectory(IHierarchicalContent hierarchicalContent,
+                String hdf5DataSetCode)
+        {
+            File existingDataSetRoot = hierarchicalContent.getRootNode().getFile();
+            File newDataSetRoot = new File(existingDataSetRoot.getParent(), hdf5DataSetCode);
+
+            if (false == newDataSetRoot.mkdirs())
+            {
+                throw new EnvironmentFailureException(
+                        "Cannot create folder for HDF5 compression data set - '" + newDataSetRoot
+                                + "'");
+            }
+            return newDataSetRoot;
+        }
+
+        private boolean shouldBeCompressed(File file)
+        {
+            return file.isDirectory() && FileUtilities.hasHDF5ContainerSuffix(file);
+        }
+
+        private boolean hasFoldersForCompressing(IHierarchicalContent hierarchicalContent)
+        {
+            IHierarchicalContentNode root = hierarchicalContent.getRootNode();
+            for (IHierarchicalContentNode child : root.getChildNodes())
+            {
+                if (shouldBeCompressed(child.getFile()))
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        private boolean shouldCompressToHdf5(ExternalData dataSet)
+        {
+            if (dataSet == null)
+            {
+                operationLog.warn("Data set '" + dataSetCode
+                        + "' is no longer available in openBIS."
+                        + "Archiving post-registration task will be skipped...");
+                return false;
+            }
+            if (dataSet.tryGetContainer() == null)
+            {
+                operationLog.info("Data set '" + dataSetCode + "' is not part of a container."
+                        + "Compression to HDF5 will be skipped...");
+                return false;
+            }
+
+            final String dataSetTypeCode = dataSet.getDataSetType().getCode();
+            if (false == processedDataSetTypes.contains(dataSetTypeCode))
+            {
+                operationLog.debug(String.format(
+                                "Data set type '%s' is not configured for HDF5 compressing. Skipping "
+                                        + "compression for data set '%s'...", dataSetTypeCode,
+                                dataSetCode));
+                return false;
+            }
+            return true;
+        }
+
+        public ICleanupTask createCleanupTask()
+        {
+            return new Hdf5CompressingCleanupTask(dataSetCode);
+        }
+
+        private void registerTwinDataset(String hdf5DataSetCode, DataSet protoDataSet)
+        {
+
+            DataSetInformation dataSetInformation = createDataSetInformation(protoDataSet);
+            NewExternalData twinExternalData = createTwin(hdf5DataSetCode, protoDataSet);
+
+            service.registerDataSet(dataSetInformation, twinExternalData);
+            ContainerDataSet container = protoDataSet.tryGetContainer();
+            DataSetUpdatesDTO containerUpdate = addNewContainedDataSet(container, hdf5DataSetCode);
+            service.updateDataSet(containerUpdate);
+        }
+
+        private DataSetInformation createDataSetInformation(DataSet protoDataSet)
+        {
+            DataSetInformation result = new DataSetInformation();
+            result.setExperimentIdentifier(extractExperimentIdentifier(protoDataSet));
+            SampleIdentifier sampleIdentifier = extractSampleIdentifier(protoDataSet);
+            if (sampleIdentifier != null)
+            {
+                result.setSampleIdentifier(sampleIdentifier);
+            }
+            return result;
+        }
+
+        private DataSetUpdatesDTO addNewContainedDataSet(ContainerDataSet container,
+                String hdf5DataSetCode)
+        {
+            DataSetUpdatesDTO updatesDTO = new DataSetUpdatesDTO();
+            updatesDTO.setVersion(container.getModificationDate());
+
+            updatesDTO.setDatasetId(new TechId(container.getId()));
+            updatesDTO.setProperties(container.getProperties());
+            updatesDTO.setExperimentIdentifierOrNull(extractExperimentIdentifier(container));
+            updatesDTO.setSampleIdentifierOrNull(extractSampleIdentifier(container));
+            List<String> newContainedCodes = Code.extractCodes(container.getContainedDataSets());
+            // the new data set will shadow the contents of the existing
+            int hdf5DataSetIndex = Math.max(0, newContainedCodes.indexOf(dataSetCode));
+            newContainedCodes.add(hdf5DataSetIndex, hdf5DataSetCode);
+            updatesDTO.setModifiedContainedDatasetCodesOrNull(newContainedCodes
+                    .toArray(new String[0]));
+            return updatesDTO;
+
+        }
+
+        private NewExternalData createTwin(String hdf5DataSetCode, DataSet protoDataSet)
+        {
+            NewExternalData externalData = new NewExternalData();
+            externalData.setDataProducerCode(protoDataSet.getDataProducerCode());
+            externalData.setDataSetProperties(extractProperties(protoDataSet));
+            externalData.setDataSetType(protoDataSet.getDataSetType());
+            externalData.setDataStoreCode(protoDataSet.getDataStore().getCode());
+            externalData.setExperimentIdentifierOrNull(extractExperimentIdentifier(protoDataSet));
+            externalData.setMeasured(protoDataSet.isDerived() == false);
+            externalData.setParentDataSetCodes(Code.extractCodes(protoDataSet.getParents()));
+            // TODO KE: no API way to set children data sets
+            externalData.setProductionDate(protoDataSet.getProductionDate());
+            externalData.setRegistrationDate(protoDataSet.getRegistrationDate());
+            externalData.setSampleIdentifierOrNull(extractSampleIdentifier(protoDataSet));
+            externalData.setFileFormatType(protoDataSet.getFileFormatType());
+            externalData.setLocation(protoDataSet.getLocation());
+            externalData.setLocatorType(protoDataSet.getLocatorType());
+            externalData.setShareId(protoDataSet.getShareId());
+            externalData.setSpeedHint(protoDataSet.getSpeedHint());
+            externalData.setStorageFormat(StorageFormat.PROPRIETARY);
+
+            externalData.setCode(hdf5DataSetCode);
+            final File protoDataSetLocation = new File(protoDataSet.getLocation());
+            final String newDataSetLocation =
+                    new File(protoDataSetLocation.getParentFile(), hdf5DataSetCode).getPath();
+            externalData.setLocation(newDataSetLocation);
+
+            return externalData;
+        }
+
+        private List<NewProperty> extractProperties(DataSet protoDataSet)
+        {
+            ArrayList<NewProperty> newProperties = new ArrayList<NewProperty>();
+            for (IEntityProperty prop : protoDataSet.getProperties())
+            {
+                NewProperty newProp =
+                        new NewProperty(prop.getPropertyType().getCode(), prop.tryGetAsString());
+                newProperties.add(newProp);
+            }
+            return newProperties;
+        }
+    }
+
+    static ExternalData tryGetDataSet(String dataSetCode,
+            IEncapsulatedOpenBISService service)
+    {
+        List<String> codeAsList = Collections.singletonList(dataSetCode);
+        List<ExternalData> dataList = service.listDataSetsByCode(codeAsList);
+        if (dataList == null || dataList.isEmpty())
+        {
+            return null;
+        }
+
+        return dataList.get(0);
+    }
+
+    private ExperimentIdentifier extractExperimentIdentifier(ExternalData data)
+    {
+        return ExperimentIdentifierFactory.parse(data.getExperiment().getIdentifier());
+    }
+
+    private SampleIdentifier extractSampleIdentifier(ExternalData data)
+    {
+        if (data.getSampleIdentifier() != null)
+        {
+            return SampleIdentifierFactory.parse(data.getSampleIdentifier());
+
+        } else
+        {
+            return null;
+        }
+    }
+
+    private static File getCleanupMarkerFile(String dataSetCode, String shareId)
+    {
+        File storeRoot = ServiceProvider.getConfigProvider().getStoreRoot();
+        File shareRoot = new File(storeRoot, shareId);
+        File hdf5CompressionMarkers = new File(shareRoot, HDF5_COMPRESSION_CLEANUP_MARKERS_DIRNAME);
+        return new File(hdf5CompressionMarkers, dataSetCode);
+    }
+
+    private static class Hdf5CompressingCleanupTask implements ICleanupTask
+    {
+        private static final long serialVersionUID = 1L;
+
+        private final String dataSetCode;
+
+        Hdf5CompressingCleanupTask(String dataSetCode)
+        {
+            this.dataSetCode = dataSetCode;
+        }
+
+        public void cleanup(ISimpleLogger logger)
+        {
+            DataSet dataSet =
+                    (DataSet) tryGetDataSet(dataSetCode, ServiceProvider.getOpenBISService());
+            if (dataSet != null)
+            {
+                File cleanupMarkerFile = getCleanupMarkerFile(dataSetCode, dataSet.getShareId());
+                if (cleanupMarkerFile.exists())
+                {
+                    cleanup(dataSet, cleanupMarkerFile);
+                    cleanupMarkerFile.delete();
+                }
+            }
+        }
+
+        private void cleanup(DataSet dataSet, File cleanupMarkerFile)
+        {
+            String danglingHdf5DirName = FileUtilities.loadToString(cleanupMarkerFile).trim();
+            File danglingDataSetDir = new File(danglingHdf5DirName);
+            if (danglingDataSetDir.exists())
+            {
+                Collection<String> danglingDirContents =
+                        getDanglingDirContentsAsSet(danglingDataSetDir);
+                Collection<String> dataSetContents = getDataSetContents(dataSet);
+                if (dataSetContents.containsAll(danglingDirContents))
+                {
+                    // dangling dir has known contents, so we can proceed with the deletion
+                    operationLog.info("Deleting dangling HDF5 compression folder "
+                            + danglingHdf5DirName);
+                    FileUtilities.deleteRecursively(danglingDataSetDir);
+                } else
+                {
+                    // marker file points to a directory which has files not present in the
+                    // original data set. we'll skip deletion
+                    operationLog
+                            .error(String
+                                    .format("Unexpected set of contents '%s' in dangling directory '%s'. "
+                                            + "Only files with the following names '%s' are expected. Deletion of '%s' will be skipped.",
+                                            danglingHdf5DirName, danglingDirContents,
+                                            dataSetContents, danglingHdf5DirName));
+                }
+            }
+        }
+
+        private Collection<String> getDanglingDirContentsAsSet(File danglingDataSetDir)
+        {
+            List<File> files = FileUtilities.listFilesAndDirectories(danglingDataSetDir, false);
+            List<String> result = new ArrayList<String>();
+            for (File f : files)
+            {
+                result.add(f.getName());
+            }
+            return result;
+        }
+
+        private Collection<String> getDataSetContents(DataSet dataSet)
+        {
+            IHierarchicalContent hierarchicalContent =
+                    ServiceProvider.getHierarchicalContentProvider().asContent(dataSet.getCode());
+            List<String> result = new ArrayList<String>();
+            try
+            {
+                for (IHierarchicalContentNode node : hierarchicalContent.getRootNode()
+                        .getChildNodes())
+                {
+                    result.add(node.getName());
+                }
+            } finally
+            {
+                hierarchicalContent.close();
+            }
+
+            return result;
+        }
+
+    }
+
+}
diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/EncapsulatedOpenBISService.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/EncapsulatedOpenBISService.java
index 6e00611ce8c..48cc9d6d61d 100644
--- a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/EncapsulatedOpenBISService.java
+++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/EncapsulatedOpenBISService.java
@@ -65,6 +65,7 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTerm;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationResult;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetShareId;
+import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetUpdatesDTO;
 import ch.systemsx.cisd.openbis.generic.shared.dto.ListSamplesByPropertyCriteria;
 import ch.systemsx.cisd.openbis.generic.shared.dto.NewExternalData;
 import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty;
@@ -594,4 +595,14 @@ public final class EncapsulatedOpenBISService implements IEncapsulatedOpenBISSer
         return service.listMaterials(session.getToken(), criteria, withProperties);
     }
 
+    public void removeDataSetsPermanently(List<String> dataSetCodes, String reason)
+    {
+        service.removeDataSetsPermanently(session.getToken(), dataSetCodes, reason);
+    }
+
+    public void updateDataSet(DataSetUpdatesDTO dataSetUpdates)
+    {
+        service.updateDataSet(session.getToken(), dataSetUpdates);
+    }
+
 }
\ No newline at end of file
diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/IEncapsulatedOpenBISService.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/IEncapsulatedOpenBISService.java
index 3c467b90a60..563b4c85d68 100644
--- a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/IEncapsulatedOpenBISService.java
+++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/IEncapsulatedOpenBISService.java
@@ -50,6 +50,7 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTerm;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationResult;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetShareId;
+import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetUpdatesDTO;
 import ch.systemsx.cisd.openbis.generic.shared.dto.ListSamplesByPropertyCriteria;
 import ch.systemsx.cisd.openbis.generic.shared.dto.NewExternalData;
 import ch.systemsx.cisd.openbis.generic.shared.dto.NewProperty;
@@ -470,4 +471,15 @@ public interface IEncapsulatedOpenBISService
     @ManagedAuthentication
     public List<Experiment> listExperiments(ProjectIdentifier projectIdentifier);
 
+    /**
+     * {@link IETLLIMSService#removeDataSetsPermanently(String, List, String)}
+     */
+    @ManagedAuthentication
+    public void removeDataSetsPermanently(List<String> dataSetCodes, String reason);
+
+    /**
+     * {@link IETLLIMSService#updateDataSet(String, DataSetUpdatesDTO)}
+     */
+    @ManagedAuthentication
+    public void updateDataSet(DataSetUpdatesDTO dataSetUpdates);
 }
\ No newline at end of file
diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/dto/DataSetInformation.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/dto/DataSetInformation.java
index 9689448d8c5..e1a87c5d472 100644
--- a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/dto/DataSetInformation.java
+++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/dto/DataSetInformation.java
@@ -261,6 +261,20 @@ public class DataSetInformation implements Serializable
                 sampleCode);
     }
 
+    /**
+     * Sets the sample identifier.
+     */
+    public final void setSampleIdentifier(SampleIdentifier sampleIdentifier)
+    {
+        setSampleCode(sampleIdentifier.getSampleCode());
+        final SpaceIdentifier spaceLevel = sampleIdentifier.getSpaceLevel();
+        if (spaceLevel != null)
+        {
+            setSpaceCode(spaceLevel.getSpaceCode());
+            setInstanceCode(spaceLevel.getDatabaseInstanceCode());
+        }
+    }
+
     public final String getSampleCode()
     {
         return sampleCode;
diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/AbstractServer.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/AbstractServer.java
index 11a19af0e2b..62b9fba3772 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/AbstractServer.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/AbstractServer.java
@@ -22,7 +22,9 @@ import java.util.Collections;
 import java.util.Date;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadPoolExecutor;
@@ -43,6 +45,7 @@ import ch.systemsx.cisd.common.mail.MailClientParameters;
 import ch.systemsx.cisd.common.spring.AbstractServiceWithLogger;
 import ch.systemsx.cisd.openbis.generic.server.business.IPropertiesBatchManager;
 import ch.systemsx.cisd.openbis.generic.server.business.PropertiesBatchManager;
+import ch.systemsx.cisd.openbis.generic.server.business.bo.IDataSetTable;
 import ch.systemsx.cisd.openbis.generic.server.dataaccess.IDAOFactory;
 import ch.systemsx.cisd.openbis.generic.server.plugin.DataSetServerPluginRegistry;
 import ch.systemsx.cisd.openbis.generic.server.plugin.IDataSetTypeSlaveServerPlugin;
@@ -62,6 +65,7 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.NewSample;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.NewSamplesWithTypes;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.RoleWithHierarchy.RoleCode;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.SampleType;
+import ch.systemsx.cisd.openbis.generic.shared.dto.DataPE;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetTypePE;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataStorePE;
 import ch.systemsx.cisd.openbis.generic.shared.dto.GridCustomColumnPE;
@@ -211,6 +215,35 @@ public abstract class AbstractServer<T> extends AbstractServiceWithLogger<T> imp
                 .getSlaveServer();
     }
 
+    @Deprecated
+    /** @deprecated this is legacy code permanently deleting data sets one by one omitting trash */
+    protected void permanentlyDeleteDataSets(Session session, IDataSetTable dataSetTable,
+            List<String> dataSetCodes, String reason, boolean force)
+    {
+        // TODO 2011-06-21, Piotr Buczek: loading less for deletion would probably be faster
+        dataSetTable.loadByDataSetCodes(dataSetCodes, false, false);
+        List<DataPE> dataSets = dataSetTable.getDataSets();
+        Map<DataSetTypePE, List<DataPE>> groupedDataSets =
+                new LinkedHashMap<DataSetTypePE, List<DataPE>>();
+        for (DataPE dataSet : dataSets)
+        {
+            DataSetTypePE dataSetType = dataSet.getDataSetType();
+            List<DataPE> list = groupedDataSets.get(dataSetType);
+            if (list == null)
+            {
+                list = new ArrayList<DataPE>();
+                groupedDataSets.put(dataSetType, list);
+            }
+            list.add(dataSet);
+        }
+        for (Map.Entry<DataSetTypePE, List<DataPE>> entry : groupedDataSets.entrySet())
+        {
+            DataSetTypePE dataSetType = entry.getKey();
+            IDataSetTypeSlaveServerPlugin plugin = getDataSetTypeSlaveServerPlugin(dataSetType);
+            plugin.permanentlyDeleteDataSets(session, entry.getValue(), reason, force);
+        }
+    }
+
     private final RoleAssignmentPE createRoleAssigment(final PersonPE registrator,
             final PersonPE person, final RoleCode roleCode)
     {
diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/CommonServer.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/CommonServer.java
index 28064eedd6e..952d65e00cd 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/CommonServer.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/CommonServer.java
@@ -23,7 +23,6 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
-import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
@@ -85,7 +84,6 @@ import ch.systemsx.cisd.openbis.generic.server.dataaccess.IRoleAssignmentDAO;
 import ch.systemsx.cisd.openbis.generic.server.dataaccess.db.HibernateSearchDataProvider;
 import ch.systemsx.cisd.openbis.generic.server.jython.api.v1.impl.EncapsulatedCommonServer;
 import ch.systemsx.cisd.openbis.generic.server.jython.api.v1.impl.MasterDataRegistrationScriptRunner;
-import ch.systemsx.cisd.openbis.generic.server.plugin.IDataSetTypeSlaveServerPlugin;
 import ch.systemsx.cisd.openbis.generic.server.util.GroupIdentifierHelper;
 import ch.systemsx.cisd.openbis.generic.shared.basic.BasicEntityInformationHolder;
 import ch.systemsx.cisd.openbis.generic.shared.basic.CodeConverter;
@@ -1166,6 +1164,7 @@ public final class CommonServer extends AbstractCommonServer<ICommonServerForInt
         Session session = getSession(sessionToken);
         // NOTE: logical deletion and new implementation of permanent deletion doesn't use
         // IDataSetTypeSlaveServerPlugin (we have just 1 implementation!)
+        final IDataSetTable dataSetTable = businessObjectFactory.createDataSetTable(session);
         switch (deletionType)
         {
             case PERMANENT:
@@ -1177,11 +1176,10 @@ public final class CommonServer extends AbstractCommonServer<ICommonServerForInt
                     deletedDataSetTable.permanentlyDeleteLoadedDataSets(reason, force);
                 } else
                 {
-                    permanentlyDeleteDataSets(session, dataSetCodes, reason, force);
+                    permanentlyDeleteDataSets(session, dataSetTable, dataSetCodes, reason, force);
                 }
                 break;
             case TRASH:
-                IDataSetTable dataSetTable = businessObjectFactory.createDataSetTable(session);
                 dataSetTable.loadByDataSetCodes(dataSetCodes, false, false);
                 List<DataPE> dataSets = dataSetTable.getDataSets();
                 ITrashBO trashBO = businessObjectFactory.createTrashBO(session);
@@ -1191,36 +1189,6 @@ public final class CommonServer extends AbstractCommonServer<ICommonServerForInt
         }
     }
 
-    @Deprecated
-    /** @deprecated this is legacy code permanently deleting data sets one by one omitting trash */
-    private void permanentlyDeleteDataSets(Session session, List<String> dataSetCodes,
-            String reason, boolean force)
-    {
-        IDataSetTable dataSetTable = businessObjectFactory.createDataSetTable(session);
-        // TODO 2011-06-21, Piotr Buczek: loading less for deletion would probably be faster
-        dataSetTable.loadByDataSetCodes(dataSetCodes, false, false);
-        List<DataPE> dataSets = dataSetTable.getDataSets();
-        Map<DataSetTypePE, List<DataPE>> groupedDataSets =
-                new LinkedHashMap<DataSetTypePE, List<DataPE>>();
-        for (DataPE dataSet : dataSets)
-        {
-            DataSetTypePE dataSetType = dataSet.getDataSetType();
-            List<DataPE> list = groupedDataSets.get(dataSetType);
-            if (list == null)
-            {
-                list = new ArrayList<DataPE>();
-                groupedDataSets.put(dataSetType, list);
-            }
-            list.add(dataSet);
-        }
-        for (Map.Entry<DataSetTypePE, List<DataPE>> entry : groupedDataSets.entrySet())
-        {
-            DataSetTypePE dataSetType = entry.getKey();
-            IDataSetTypeSlaveServerPlugin plugin = getDataSetTypeSlaveServerPlugin(dataSetType);
-            plugin.permanentlyDeleteDataSets(session, entry.getValue(), reason, force);
-        }
-    }
-
     public void deleteSamples(String sessionToken, List<TechId> sampleIds, String reason,
             DeletionType deletionType)
     {
diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLService.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLService.java
index d70907fa482..608b2e2189a 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLService.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLService.java
@@ -40,6 +40,7 @@ import ch.systemsx.cisd.openbis.generic.server.business.IDataStoreServiceFactory
 import ch.systemsx.cisd.openbis.generic.server.business.IPropertiesBatchManager;
 import ch.systemsx.cisd.openbis.generic.server.business.bo.ICommonBusinessObjectFactory;
 import ch.systemsx.cisd.openbis.generic.server.business.bo.IDataBO;
+import ch.systemsx.cisd.openbis.generic.server.business.bo.IDataSetTable;
 import ch.systemsx.cisd.openbis.generic.server.business.bo.IExperimentBO;
 import ch.systemsx.cisd.openbis.generic.server.business.bo.IExperimentTable;
 import ch.systemsx.cisd.openbis.generic.server.business.bo.IGroupBO;
@@ -1473,4 +1474,21 @@ public class ETLService extends AbstractCommonServer<IETLLIMSService> implements
         return criteria;
 
     }
+
+    @SuppressWarnings("deprecation")
+    public void removeDataSetsPermanently(String sessionToken, List<String> dataSetCodes,
+            String reason)
+    {
+        Session session = getSession(sessionToken);
+        IDataSetTable dataSetTable = businessObjectFactory.createDataSetTable(session);
+        permanentlyDeleteDataSets(session, dataSetTable, dataSetCodes, reason, true);
+    }
+
+    public void updateDataSet(String sessionToken, DataSetUpdatesDTO dataSetUpdates)
+    {
+        final Session session = getSession(sessionToken);
+        final IDataBO dataSetBO = businessObjectFactory.createDataBO(session);
+        dataSetBO.update(dataSetUpdates);
+    }
+
 }
diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLServiceLogger.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLServiceLogger.java
index d4507f0cfa4..811304dcb2e 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLServiceLogger.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/ETLServiceLogger.java
@@ -23,6 +23,7 @@ import java.util.List;
 import org.apache.log4j.Level;
 
 import ch.systemsx.cisd.authentication.ISessionManager;
+import ch.systemsx.cisd.common.collections.CollectionUtils;
 import ch.systemsx.cisd.common.exceptions.UserFailureException;
 import ch.systemsx.cisd.common.spring.IInvocationLoggerContext;
 import ch.systemsx.cisd.openbis.generic.shared.AbstractServerLogger;
@@ -55,6 +56,7 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTerm;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationResult;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetShareId;
+import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetUpdatesDTO;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataStoreServerInfo;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DatastoreServiceDescriptions;
 import ch.systemsx.cisd.openbis.generic.shared.dto.EntityCollectionForCreationOrUpdate;
@@ -507,4 +509,16 @@ public class ETLServiceLogger extends AbstractServerLogger implements IETLLIMSSe
                 withProperties);
         return null;
     }
+
+    public void removeDataSetsPermanently(String sessionToken, List<String> dataSetCodes,
+            String reason)
+    {
+        logAccess(sessionToken, "removeDataSetsPermanently", "DATA_SET_CODES(%s) REASON(%s)",
+                CollectionUtils.abbreviate(dataSetCodes, 5), reason);
+    }
+
+    public void updateDataSet(String sessionToken, DataSetUpdatesDTO dataSetUpdates)
+    {
+        logAccess(sessionToken, "updateDataSet", "DATA_SET_UPDATES(%s)", dataSetUpdates);
+    }
 }
diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/datasetlister/DatasetLister.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/datasetlister/DatasetLister.java
index 738ee3c1334..ba555ae800e 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/datasetlister/DatasetLister.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/datasetlister/DatasetLister.java
@@ -341,8 +341,9 @@ public class DatasetLister extends AbstractLister implements IDatasetLister
     {
         String[] codes = datasetCodes.toArray(new String[datasetCodes.size()]);
         DataIterator<DatasetRecord> datasets = query.getDatasets(codes);
-        loadSmallConnectedTables();
-        return asList(createPrimaryDatasets(asList(datasets)));
+        return enrichDatasets(datasets);
+        // loadSmallConnectedTables();
+        // return asList(createPrimaryDatasets(asList(datasets)));
     }
 
     public List<ExternalData> listByDataStore(long dataStoreID)
diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/shared/IETLLIMSService.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/shared/IETLLIMSService.java
index 2b5c1530032..21155a6d184 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/shared/IETLLIMSService.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/shared/IETLLIMSService.java
@@ -32,6 +32,7 @@ import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.AbstractT
 import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.AtomicOperationsPredicate;
 import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.DataSetCodeCollectionPredicate;
 import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.DataSetCodePredicate;
+import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.DataSetUpdatesPredicate;
 import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.ExistingSampleOwnerIdentifierPredicate;
 import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.ExistingSpaceIdentifierPredicate;
 import ch.systemsx.cisd.openbis.generic.shared.authorization.predicate.ListSampleCriteriaPredicate;
@@ -74,6 +75,7 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTerm;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails;
 import ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationResult;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetShareId;
+import ch.systemsx.cisd.openbis.generic.shared.dto.DataSetUpdatesDTO;
 import ch.systemsx.cisd.openbis.generic.shared.dto.DataStoreServerInfo;
 import ch.systemsx.cisd.openbis.generic.shared.dto.EntityCollectionForCreationOrUpdate;
 import ch.systemsx.cisd.openbis.generic.shared.dto.ListSamplesByPropertyCriteria;
@@ -710,4 +712,29 @@ public interface IETLLIMSService extends IServer, ISessionProvider
     @Transactional(readOnly = true)
     @RolesAllowed(RoleWithHierarchy.SPACE_ETL_SERVER)
     public List<ExternalData> searchForDataSets(String sessionToken, SearchCriteria searchCriteria);
+
+    /**
+     * permanently deletes a list of data sets.
+     */
+    @Transactional
+    @RolesAllowed(RoleWithHierarchy.SPACE_ETL_SERVER)
+    @DatabaseUpdateModification(value =
+        { ObjectKind.SAMPLE, ObjectKind.EXPERIMENT })
+    @DatabaseCreateOrDeleteModification(value =
+        { ObjectKind.DATA_SET })
+    public void removeDataSetsPermanently(
+            String sessionToken,
+            @AuthorizationGuard(guardClass = DataSetCodeCollectionPredicate.class) List<String> dataSetCodes,
+            String reason);
+
+    /**
+     * updates a data set.
+     */
+    @Transactional
+    @RolesAllowed(RoleWithHierarchy.SPACE_ETL_SERVER)
+    @DatabaseUpdateModification(value =
+        { ObjectKind.EXPERIMENT, ObjectKind.SAMPLE, ObjectKind.DATA_SET })
+    public void updateDataSet(
+            String sessionToken,
+            @AuthorizationGuard(guardClass = DataSetUpdatesPredicate.class) DataSetUpdatesDTO dataSetUpdates);
 }
-- 
GitLab