diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/DatabaseBasedDataSetPathsInfoFeeder.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/DatabaseBasedDataSetPathsInfoFeeder.java index c0aa179e5aaba9f8cbc9b844fab58e70efbb27e8..1e78bdedac55fb48e0c91415da9165b0451f40c7 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/DatabaseBasedDataSetPathsInfoFeeder.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/DatabaseBasedDataSetPathsInfoFeeder.java @@ -17,10 +17,14 @@ package ch.systemsx.cisd.etlserver.path; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; +import ch.systemsx.cisd.base.exceptions.CheckedExceptionTunnel; import ch.systemsx.cisd.common.action.IDelegatedAction; +import ch.systemsx.cisd.common.filesystem.FileUtilities; import ch.systemsx.cisd.etlserver.IDataSetPathsInfoFeeder; import ch.systemsx.cisd.openbis.common.io.hierarchical_content.IHierarchicalContentFactory; import ch.systemsx.cisd.openbis.common.io.hierarchical_content.api.IHierarchicalContent; @@ -74,9 +78,21 @@ public class DatabaseBasedDataSetPathsInfoFeeder implements IDataSetPathsInfoFee String relativePath = (parentId == null) ? "" : pathPrefix + fileName; if (directory) { + File file = pathInfo.getFile(); + if (file != null && FileUtilities.isHDF5ContainerFile(file)) + { + try + { + PathInfo.setChecksum(pathInfo, new FileInputStream(file), computeChecksum, checksumType); + } catch (FileNotFoundException ex) + { + throw CheckedExceptionTunnel.wrapIfNecessary(ex); + } + } final long directoryId = dao.createDataSetFile(dataSetId, parentId, relativePath, fileName, - pathInfo.getSizeInBytes(), directory, null, null, pathInfo.getLastModifiedDate()); + pathInfo.getSizeInBytes(), directory, pathInfo.getChecksumCRC32(), + pathInfo.getChecksum(), pathInfo.getLastModifiedDate()); if (relativePath.length() > 0) { relativePath += '/'; diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfo.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfo.java index 2778c76bc85edfbb363cfd62053b6434c3888b88..fe2ad2f960c55e4c7c2312468985c9a7fbee5175 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfo.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfo.java @@ -16,6 +16,7 @@ package ch.systemsx.cisd.etlserver.path; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.security.MessageDigest; @@ -30,6 +31,7 @@ import java.util.zip.CRC32; import org.apache.commons.io.IOUtils; import ch.systemsx.cisd.base.exceptions.CheckedExceptionTunnel; +import ch.systemsx.cisd.common.io.IOUtilities; import ch.systemsx.cisd.openbis.common.io.hierarchical_content.api.IHierarchicalContentNode; /** @@ -51,6 +53,7 @@ final class PathInfo pathInfo.fileName = node.getName(); pathInfo.lastModifiedDate = new Date(node.getLastModified()); pathInfo.directory = node.isDirectory(); + pathInfo.file = node.tryGetFile(); if (pathInfo.directory) { pathInfo.children = createPathInfos(node, computeChecksum, checksumType); @@ -81,14 +84,29 @@ final class PathInfo pathInfo.checksumCRC32 = node.getChecksumCRC32(); return; } - MessageDigest messageDigest = getMessageDigest(checksumType); - CRC32 crc = new CRC32(); - feedChecksumCalculators(node, messageDigest, crc); - pathInfo.checksumCRC32 = (int) crc.getValue(); - pathInfo.checksum = renderChecksum(checksumType, messageDigest); + setChecksum(pathInfo, node.getInputStream(), computeChecksum, checksumType); } + + static void setChecksum(PathInfo pathInfo, InputStream inputStream, boolean computeChecksum, String checksumType) + { + if (computeChecksum) + { + if (checksumType == null) + { + pathInfo.checksumCRC32 = IOUtilities.getChecksumCRC32(inputStream); + } else + { + MessageDigest messageDigest = PathInfo.getMessageDigest(checksumType); + CRC32 crc = new CRC32(); + PathInfo.feedChecksumCalculators(inputStream, messageDigest, crc); + pathInfo.checksumCRC32 = (int) crc.getValue(); + pathInfo.checksum = renderChecksum(checksumType, messageDigest); + } + } - static String renderChecksum(String checksumType, MessageDigest messageDigest) + } + + private static String renderChecksum(String checksumType, MessageDigest messageDigest) { StringBuilder builder = new StringBuilder(checksumType).append(':'); for (byte b : messageDigest.digest()) @@ -99,12 +117,10 @@ final class PathInfo return builder.toString(); } - static void feedChecksumCalculators(IHierarchicalContentNode node, MessageDigest messageDigest, CRC32 crc) + private static void feedChecksumCalculators(InputStream inputStream, MessageDigest messageDigest, CRC32 crc) { - InputStream inputStream = null; try { - inputStream = node.getInputStream(); byte[] buffer = new byte[4096]; int n = 0; while ((n = inputStream.read(buffer)) > 0) @@ -155,6 +171,8 @@ final class PathInfo }); return childInfos; } + + private File file; private String fileName; @@ -172,6 +190,11 @@ final class PathInfo private Date lastModifiedDate; + public File getFile() + { + return file; + } + public String getFileName() { return fileName; diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseChecksumCalculationTask.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseChecksumCalculationTask.java index 047af10efdc90be7e7cccbc809268bb645f3dfda..b4752472af3d0539eff26584b327eaea1ac7fe72 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseChecksumCalculationTask.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseChecksumCalculationTask.java @@ -16,6 +16,7 @@ package ch.systemsx.cisd.etlserver.path; +import java.io.InputStream; import java.security.MessageDigest; import java.util.ArrayList; import java.util.List; @@ -103,20 +104,10 @@ public class PathInfoDatabaseChecksumCalculationTask implements IMaintenanceTask for (PathEntryDTO pathEntry : pathEntries) { IHierarchicalContentNode node = content.getNode(pathEntry.getRelativePath()); - String checksum = null; - int checksumCRC32; - if (checksumType == null) - { - checksumCRC32 = IOUtilities.getChecksumCRC32(node.getInputStream()); - } else - { - MessageDigest messageDigest = PathInfo.getMessageDigest(checksumType); - CRC32 crc = new CRC32(); - PathInfo.feedChecksumCalculators(node, messageDigest, crc); - checksumCRC32 = (int) crc.getValue(); - checksum = PathInfo.renderChecksum(checksumType, messageDigest); - } - dao.updateChecksum(pathEntry.getId(), checksumCRC32, checksum); + InputStream inputStream = node.getInputStream(); + PathInfo pathInfo = new PathInfo(); + PathInfo.setChecksum(pathInfo, inputStream, true, checksumType); + dao.updateChecksum(pathEntry.getId(), pathInfo.getChecksumCRC32(), pathInfo.getChecksum()); fileCounter++; } dao.commit(); diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseFeedingTaskTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseFeedingTaskTest.java index 75c5f123cbd41143ba690c957b6d2718e6d4f528..55a42c5035bd73f24986afb2a0ef229eb0dba271 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseFeedingTaskTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/path/PathInfoDatabaseFeedingTaskTest.java @@ -175,7 +175,7 @@ public class PathInfoDatabaseFeedingTaskTest extends AbstractFileSystemTestCase } @Test - public void testH5FolderEnabled() + public void testH5FolderEnabledWithSHA1() { final SimpleDataSetInformationDTO ds1 = new SimpleDataSetInformationDTO(); ds1.setDataSetCode("ds1"); @@ -193,40 +193,40 @@ public class PathInfoDatabaseFeedingTaskTest extends AbstractFileSystemTestCase }); MockPathsInfoDAO pathsInfoDAO = new MockPathsInfoDAO(); - createTask(pathsInfoDAO, 12, 3, 0).execute(); + createTask(pathsInfoDAO, "SHA1", 12, 3, 0).execute(); assertEquals("createDataSet(code=ds1, location=1)\n" + "createDataSetFile(0, parent=null, 1 (, 1007978, d))\n" + "createDataSetFile(0, parent=1, test-data (test-data, 1007968, d))\n" - + "createDataSetFile(0, parent=2, thumbnails2.h5 (test-data/thumbnails2.h5, 490761, d))\n" + + "createDataSetFile(0, parent=2, thumbnails2.h5 (test-data/thumbnails2.h5, 490761, d, checksumCRC32=9fb9b84a, checksum=SHA1:cfb5c11ae566a094c3d950ac0fd89057e3eecf56))\n" + "createDataSetFiles:\n" - + " 0, parent=1, info.txt (info.txt, 10, f, checksumCRC32=176bdc9d)\n" - + " 0, parent=2, farray.h5 (test-data/farray.h5, 8640, f, checksumCRC32=47dedeef)\n" - + " 0, parent=2, thumbnails.h5ar (test-data/thumbnails.h5ar, 508567, f, checksumCRC32=9fb9b84a)\n" - + " 0, parent=3, wA1_d1-1_cCy3.png (test-data/thumbnails2.h5/wA1_d1-1_cCy3.png, 24242, f, checksumCRC32=3361fd20)\n" - + " 0, parent=3, wA1_d1-1_cDAPI.png (test-data/thumbnails2.h5/wA1_d1-1_cDAPI.png, 29353, f, checksumCRC32=609f3183)\n" - + " 0, parent=3, wA1_d1-1_cGFP.png (test-data/thumbnails2.h5/wA1_d1-1_cGFP.png, 27211, f, checksumCRC32=b68f97cf)\n" - + " 0, parent=3, wA1_d1-2_cCy3.png (test-data/thumbnails2.h5/wA1_d1-2_cCy3.png, 28279, f, checksumCRC32=e2c7c34f)\n" - + " 0, parent=3, wA1_d1-2_cDAPI.png (test-data/thumbnails2.h5/wA1_d1-2_cDAPI.png, 22246, f, checksumCRC32=1bf73b61)\n" - + " 0, parent=3, wA1_d1-2_cGFP.png (test-data/thumbnails2.h5/wA1_d1-2_cGFP.png, 22227, f, checksumCRC32=58e14da9)\n" - + " 0, parent=3, wA1_d2-1_cCy3.png (test-data/thumbnails2.h5/wA1_d2-1_cCy3.png, 31570, f, checksumCRC32=b312b087)\n" - + " 0, parent=3, wA1_d2-1_cDAPI.png (test-data/thumbnails2.h5/wA1_d2-1_cDAPI.png, 28267, f, checksumCRC32=e7082b23)\n" - + " 0, parent=3, wA1_d2-1_cGFP.png (test-data/thumbnails2.h5/wA1_d2-1_cGFP.png, 26972, f, checksumCRC32=fb7f320e)\n" - + " 0, parent=3, wA1_d2-2_cCy3.png (test-data/thumbnails2.h5/wA1_d2-2_cCy3.png, 34420, f, checksumCRC32=d367dd9d)\n" - + " 0, parent=3, wA1_d2-2_cDAPI.png (test-data/thumbnails2.h5/wA1_d2-2_cDAPI.png, 28070, f, checksumCRC32=15e1f3b0)\n" - + " 0, parent=3, wA1_d2-2_cGFP.png (test-data/thumbnails2.h5/wA1_d2-2_cGFP.png, 27185, f, checksumCRC32=34bcde32)\n" - + " 0, parent=3, wA1_d3-1_cCy3.png (test-data/thumbnails2.h5/wA1_d3-1_cCy3.png, 28916, f, checksumCRC32=a97cff4e)\n" - + " 0, parent=3, wA1_d3-1_cDAPI.png (test-data/thumbnails2.h5/wA1_d3-1_cDAPI.png, 30079, f, checksumCRC32=6f0abf6f)\n" - + " 0, parent=3, wA1_d3-1_cGFP.png (test-data/thumbnails2.h5/wA1_d3-1_cGFP.png, 28072, f, checksumCRC32=5ba6ae39)\n" - + " 0, parent=3, wA1_d3-2_cCy3.png (test-data/thumbnails2.h5/wA1_d3-2_cCy3.png, 26367, f, checksumCRC32=f8d4cfc7)\n" - + " 0, parent=3, wA1_d3-2_cDAPI.png (test-data/thumbnails2.h5/wA1_d3-2_cDAPI.png, 25086, f, checksumCRC32=aeb12b1a)\n" - + " 0, parent=3, wA1_d3-2_cGFP.png (test-data/thumbnails2.h5/wA1_d3-2_cGFP.png, 22199, f, checksumCRC32=ced4332a)\n" + + " 0, parent=1, info.txt (info.txt, 10, f, checksumCRC32=176bdc9d, checksum=SHA1:a5105d3fcba551031e7abdb25f9bbdb2ad3a9ffa)\n" + + " 0, parent=2, farray.h5 (test-data/farray.h5, 8640, f, checksumCRC32=47dedeef, checksum=SHA1:8f463b0c828b993efd441f602a0907d1bccb0234)\n" + + " 0, parent=2, thumbnails.h5ar (test-data/thumbnails.h5ar, 508567, f, checksumCRC32=9fb9b84a, checksum=SHA1:cfb5c11ae566a094c3d950ac0fd89057e3eecf56)\n" + + " 0, parent=3, wA1_d1-1_cCy3.png (test-data/thumbnails2.h5/wA1_d1-1_cCy3.png, 24242, f, checksumCRC32=3361fd20, checksum=SHA1:fc6eb645dc3a2934442358b4198042b1e2c8a3d4)\n" + + " 0, parent=3, wA1_d1-1_cDAPI.png (test-data/thumbnails2.h5/wA1_d1-1_cDAPI.png, 29353, f, checksumCRC32=609f3183, checksum=SHA1:0707a40ffc2620ef0dc132fce642068e0fa8db9f)\n" + + " 0, parent=3, wA1_d1-1_cGFP.png (test-data/thumbnails2.h5/wA1_d1-1_cGFP.png, 27211, f, checksumCRC32=b68f97cf, checksum=SHA1:1b63ea85ec0b020605dfdb9e52c66b81da628598)\n" + + " 0, parent=3, wA1_d1-2_cCy3.png (test-data/thumbnails2.h5/wA1_d1-2_cCy3.png, 28279, f, checksumCRC32=e2c7c34f, checksum=SHA1:b9618a2404e2f80e72334c26dabed1b54d57797e)\n" + + " 0, parent=3, wA1_d1-2_cDAPI.png (test-data/thumbnails2.h5/wA1_d1-2_cDAPI.png, 22246, f, checksumCRC32=1bf73b61, checksum=SHA1:dcd254b6a549e4a435a15e6d5e6b132882bb74f9)\n" + + " 0, parent=3, wA1_d1-2_cGFP.png (test-data/thumbnails2.h5/wA1_d1-2_cGFP.png, 22227, f, checksumCRC32=58e14da9, checksum=SHA1:68ac59f992acbd413952063ea2a43ec3362f78df)\n" + + " 0, parent=3, wA1_d2-1_cCy3.png (test-data/thumbnails2.h5/wA1_d2-1_cCy3.png, 31570, f, checksumCRC32=b312b087, checksum=SHA1:f3d3f624cb4e931e712f1a429dbb8e58782429e3)\n" + + " 0, parent=3, wA1_d2-1_cDAPI.png (test-data/thumbnails2.h5/wA1_d2-1_cDAPI.png, 28267, f, checksumCRC32=e7082b23, checksum=SHA1:8e2bbc9ca305cdb8c5e22deb57d4aed95d17ede7)\n" + + " 0, parent=3, wA1_d2-1_cGFP.png (test-data/thumbnails2.h5/wA1_d2-1_cGFP.png, 26972, f, checksumCRC32=fb7f320e, checksum=SHA1:f07f5c3886bc13bfc4a57376143aa48ce5896289)\n" + + " 0, parent=3, wA1_d2-2_cCy3.png (test-data/thumbnails2.h5/wA1_d2-2_cCy3.png, 34420, f, checksumCRC32=d367dd9d, checksum=SHA1:9882f8963d344337ccab9e0dd961c723def7ab6f)\n" + + " 0, parent=3, wA1_d2-2_cDAPI.png (test-data/thumbnails2.h5/wA1_d2-2_cDAPI.png, 28070, f, checksumCRC32=15e1f3b0, checksum=SHA1:05c8a28ddcb89738649440783696ea99343e08a4)\n" + + " 0, parent=3, wA1_d2-2_cGFP.png (test-data/thumbnails2.h5/wA1_d2-2_cGFP.png, 27185, f, checksumCRC32=34bcde32, checksum=SHA1:91653d6e94a15414951c1b4d5de6753d97832b14)\n" + + " 0, parent=3, wA1_d3-1_cCy3.png (test-data/thumbnails2.h5/wA1_d3-1_cCy3.png, 28916, f, checksumCRC32=a97cff4e, checksum=SHA1:b704d9d19ea811715185622db88496513ca0215f)\n" + + " 0, parent=3, wA1_d3-1_cDAPI.png (test-data/thumbnails2.h5/wA1_d3-1_cDAPI.png, 30079, f, checksumCRC32=6f0abf6f, checksum=SHA1:8200d0ade2f8062031ce9805a37da5623995932b)\n" + + " 0, parent=3, wA1_d3-1_cGFP.png (test-data/thumbnails2.h5/wA1_d3-1_cGFP.png, 28072, f, checksumCRC32=5ba6ae39, checksum=SHA1:7892abc5001e61e2e2133be11b7fd79a611c558d)\n" + + " 0, parent=3, wA1_d3-2_cCy3.png (test-data/thumbnails2.h5/wA1_d3-2_cCy3.png, 26367, f, checksumCRC32=f8d4cfc7, checksum=SHA1:448f695916bd8275fd35082b6bd84997b420d190)\n" + + " 0, parent=3, wA1_d3-2_cDAPI.png (test-data/thumbnails2.h5/wA1_d3-2_cDAPI.png, 25086, f, checksumCRC32=aeb12b1a, checksum=SHA1:07faef2f0d836b848b60d1459f73cac0ec08a555)\n" + + " 0, parent=3, wA1_d3-2_cGFP.png (test-data/thumbnails2.h5/wA1_d3-2_cGFP.png, 22199, f, checksumCRC32=ced4332a, checksum=SHA1:e16f03d67e86d952cfec4a9a39065e7ec78beb99)\n" + "commit()\n" + "deleteLastFeedingEvent()\n" + "createLastFeedingEvent(Thu Jan 01 01:01:18 CET 1970)\n" + "commit()\n", pathsInfoDAO.getLog()); } - + @Test public void testH5arFolderEnabled() { @@ -251,7 +251,7 @@ public class PathInfoDatabaseFeedingTaskTest extends AbstractFileSystemTestCase assertEquals("createDataSet(code=ds1, location=1)\n" + "createDataSetFile(0, parent=null, 1 (, 1007978, d))\n" + "createDataSetFile(0, parent=1, test-data (test-data, 1007968, d))\n" - + "createDataSetFile(0, parent=2, thumbnails.h5ar (test-data/thumbnails.h5ar, 490761, d))\n" + + "createDataSetFile(0, parent=2, thumbnails.h5ar (test-data/thumbnails.h5ar, 490761, d, checksumCRC32=9fb9b84a))\n" + "createDataSetFiles:\n" + " 0, parent=1, info.txt (info.txt, 10, f, checksumCRC32=176bdc9d)\n" + " 0, parent=2, farray.h5 (test-data/farray.h5, 8640, f, checksumCRC32=47dedeef)\n"