From 04ca0c8bcedef9e60d0fa94ff72a293f19e0b038 Mon Sep 17 00:00:00 2001 From: tpylak <tpylak> Date: Tue, 6 Jul 2010 17:27:39 +0000 Subject: [PATCH] LMS-1607 port screening datasets speed improvement from S83.x SVN: 16902 --- .../openbis/dss/etl/HCSDatasetUploader.java | 98 +++++++++++++------ .../bdsmigration/BDSImagingDbUploader.java | 13 ++- .../imaging/dataaccess/IImagingQueryDAO.java | 34 ++++--- .../dataaccess/ImgChannelStackDTO.java | 6 +- .../imaging/dataaccess/ImgImageDTO.java | 8 +- .../dataaccess/ImagingQueryDAOTest.java | 21 ++-- 6 files changed, 122 insertions(+), 58 deletions(-) diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSDatasetUploader.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSDatasetUploader.java index b1fe18a94e1..e6abc225865 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSDatasetUploader.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/HCSDatasetUploader.java @@ -25,7 +25,6 @@ import java.util.Set; import java.util.Map.Entry; import ch.systemsx.cisd.bds.hcs.Location; -import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException; import ch.systemsx.cisd.openbis.dss.etl.ScreeningContainerDatasetInfoHelper.ExperimentWithChannelsAndContainer; import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; @@ -62,6 +61,7 @@ public class HCSDatasetUploader dao, info, channels); long contId = basicStruct.getContainerId(); Map<String, Long/* (tech id */> channelsMap = basicStruct.getChannelsMap(); + Long[][] spotIds = getOrCreateSpots(contId, info, images); long datasetId = createDataset(contId, info); @@ -105,11 +105,11 @@ public class HCSDatasetUploader { Map<ImgChannelStackDTO, List<AcquiredImageInStack>> stackImagesMap = makeStackImagesMap(images, spotIds, datasetId); - createChannelStacks(stackImagesMap.keySet()); + dao.addChannelStacks(new ArrayList<ImgChannelStackDTO>(stackImagesMap.keySet())); createImages(stackImagesMap, channelsMap); } - private static Map<ImgChannelStackDTO, List<AcquiredImageInStack>> makeStackImagesMap( + private Map<ImgChannelStackDTO, List<AcquiredImageInStack>> makeStackImagesMap( List<AcquiredPlateImage> images, Long[][] spotIds, long datasetId) { Map<ImgChannelStackDTO, List<AcquiredImageInStack>> map = @@ -134,11 +134,12 @@ public class HCSDatasetUploader .getThumbnailFilePathOrNull()); } - private static ImgChannelStackDTO makeStackDTO(AcquiredPlateImage image, Long[][] spotIds, + private ImgChannelStackDTO makeStackDTO(AcquiredPlateImage image, Long[][] spotIds, long datasetId) { long spotId = getSpotId(image, spotIds); - return new ImgChannelStackDTO(image.getTileRow(), image.getTileColumn(), datasetId, spotId); + return new ImgChannelStackDTO(dao.createChannelStackId(), image.getTileRow(), image + .getTileColumn(), datasetId, spotId); } private static long getSpotId(AcquiredPlateImage image, Long[][] spotIds) @@ -153,62 +154,103 @@ public class HCSDatasetUploader private void createImages(Map<ImgChannelStackDTO, List<AcquiredImageInStack>> stackImagesMap, Map<String, Long> channelsMap) { + ImagesToCreate imagesToCreate = + new ImagesToCreate(new ArrayList<ImgImageDTO>(), + new ArrayList<ImgAcquiredImageDTO>()); for (Entry<ImgChannelStackDTO, List<AcquiredImageInStack>> entry : stackImagesMap .entrySet()) { long stackId = entry.getKey().getId(); - createImages(stackId, channelsMap, entry.getValue()); + addImagesToCreate(imagesToCreate, stackId, channelsMap, entry.getValue()); } + dao.addImages(imagesToCreate.getImages()); + dao.addAcquiredImages(imagesToCreate.getAcquiredImages()); } - private void createImages(long stackId, Map<String, Long> channelsMap, - List<AcquiredImageInStack> images) + /** + * Because we can have millions of images, we have to create them in batches. That is why we + * create all the DTOs first and generate ids for them before they are created in the database. + * Then we can save everything in one go. + */ + private void addImagesToCreate(ImagesToCreate imagesToCreate, long stackId, + Map<String, Long> channelsMap, List<AcquiredImageInStack> images) { + List<ImgImageDTO> imageDTOs = imagesToCreate.getImages(); + List<ImgAcquiredImageDTO> acquiredImageDTOs = imagesToCreate.getAcquiredImages(); for (AcquiredImageInStack image : images) { - String channelName = image.getChannelName(); - Long channelTechId = channelsMap.get(channelName); - if (channelTechId == null) + long channelTechId = channelsMap.get(image.getChannelName()); + + ImgImageDTO imageDTO = mkImageWithIdDTO(image.getImageFilePath()); + ImgImageDTO thumbnailDTO = tryMkImageWithIdDTO(image.getThumbnailPathOrNull()); + Long thumbnailId = thumbnailDTO == null ? null : thumbnailDTO.getId(); + ImgAcquiredImageDTO acquiredImage = + mkAcquiredImage(stackId, channelTechId, imageDTO.getId(), thumbnailId); + + imageDTOs.add(imageDTO); + if (thumbnailDTO != null) { - throw new EnvironmentFailureException("Invalid channel name " + channelName - + ". Available channels: " + channelsMap.keySet()); + imageDTOs.add(thumbnailDTO); } - createImage(stackId, channelTechId, image); + acquiredImageDTOs.add(acquiredImage); + } + } + + private static class ImagesToCreate + { + private final List<ImgImageDTO> images; + + private final List<ImgAcquiredImageDTO> acquiredImages; + + public ImagesToCreate(List<ImgImageDTO> images, List<ImgAcquiredImageDTO> acquiredImages) + { + super(); + this.images = images; + this.acquiredImages = acquiredImages; + } + + public List<ImgImageDTO> getImages() + { + return images; + } + + public List<ImgAcquiredImageDTO> getAcquiredImages() + { + return acquiredImages; } } - private void createImage(long stackId, long channelTechId, AcquiredImageInStack image) + private ImgAcquiredImageDTO mkAcquiredImage(long stackId, long channelTechId, long imageId, + Long thumbnailId) { - long imageId = addImage(image.getImageFilePath()); - Long thumbnailId = addImage(image.getThumbnailPathOrNull()); ImgAcquiredImageDTO acquiredImage = new ImgAcquiredImageDTO(); acquiredImage.setImageId(imageId); acquiredImage.setThumbnailId(thumbnailId); acquiredImage.setChannelStackId(stackId); acquiredImage.setChannelId(channelTechId); - dao.addAcquiredImage(acquiredImage); + return acquiredImage; } - private Long addImage(RelativeImageReference imageReferenceOrNull) + private ImgImageDTO tryMkImageWithIdDTO(RelativeImageReference imageReferenceOrNull) { if (imageReferenceOrNull == null) { return null; } - return dao.addImage(new ImgImageDTO(imageReferenceOrNull.getRelativeImagePath(), - imageReferenceOrNull.tryGetPage(), imageReferenceOrNull.tryGetColorComponent())); + return mkImageWithIdDTO(imageReferenceOrNull); } - private void createChannelStacks(Set<ImgChannelStackDTO> stacks) + private ImgImageDTO mkImageWithIdDTO(RelativeImageReference imageReferenceOrNull) { - for (ImgChannelStackDTO stack : stacks) - { - long id = dao.addChannelStack(stack); - stack.setId(id); - } + ImgImageDTO dto = + new ImgImageDTO(dao.createImageId(), imageReferenceOrNull.getRelativeImagePath(), + imageReferenceOrNull.tryGetPage(), imageReferenceOrNull + .tryGetColorComponent()); + return dto; } - // returns a matrix of spot tech ids. The matrix[row][col] contains null is spot at (row,col) + // returns a matrix of spot tech ids. The matrix[row][col] contains null is + // spot at (row,col) // does not exist. Spot coordinates are 0-based in the matrix. private Long[][] getOrCreateSpots(long contId, ScreeningContainerDatasetInfo info, List<AcquiredPlateImage> images) diff --git a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/bdsmigration/BDSImagingDbUploader.java b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/bdsmigration/BDSImagingDbUploader.java index 5eeb050f3f6..62002ba1131 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/bdsmigration/BDSImagingDbUploader.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/dss/etl/bdsmigration/BDSImagingDbUploader.java @@ -59,7 +59,7 @@ class BDSImagingDbUploader private final IImagingQueryDAO dao; - BDSImagingDbUploader(File dataset, IImagingQueryDAO dao, String originalDatasetDirName, + public BDSImagingDbUploader(File dataset, IImagingQueryDAO dao, String originalDatasetDirName, List<String> channelNames, List<ColorComponent> channelColorComponentsOrNull) { this.dao = dao; @@ -70,7 +70,7 @@ class BDSImagingDbUploader } - boolean migrate() + public boolean migrate() { List<AcquiredPlateImage> images = tryExtractMappings(); if (images == null) @@ -154,7 +154,14 @@ class BDSImagingDbUploader private static List<String> readLines(File mappingFile) throws IOException, FileNotFoundException { - return IOUtils.readLines(new FileInputStream(mappingFile)); + FileInputStream stream = new FileInputStream(mappingFile); + try + { + return IOUtils.readLines(stream); + } finally + { + stream.close(); + } } private List<AcquiredPlateImage> tryParseMappings(List<String> lines) diff --git a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/IImagingQueryDAO.java b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/IImagingQueryDAO.java index 701c67e4647..9dbd1655962 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/IImagingQueryDAO.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/IImagingQueryDAO.java @@ -93,23 +93,37 @@ public interface IImagingQueryDAO extends TransactionQuery @Select(value = "SELECT * from FEATURE_VALUES where FD_ID = ?{1.id}", resultSetBinding = FeatureVectorDataObjectBinding.class) public List<ImgFeatureValuesDTO> getFeatureValues(ImgFeatureDefDTO featureDef); + // generate ids + + @Select("select nextval('images_id_seq')") + public long createImageId(); + + @Select("select nextval('channel_stacks_id_seq')") + public long createChannelStackId(); + + // batch updates + + @Update(sql = "insert into CHANNEL_STACKS (ID, X, Y, Z_in_M, T_in_SEC, DS_ID, SPOT_ID) values " + + "(?{1.id}, ?{1.column}, ?{1.row}, ?{1.z}, ?{1.t}, ?{1.datasetId}, ?{1.spotId})", batchUpdate = true) + public void addChannelStacks(List<ImgChannelStackDTO> channelStacks); + + @Update(sql = "insert into IMAGES (ID, PATH, PAGE, COLOR) values " + + "(?{1.id}, ?{1.filePath}, ?{1.page}, ?{1.colorComponentAsString})", batchUpdate = true) + public void addImages(List<ImgImageDTO> images); + + @Update(sql = "insert into ACQUIRED_IMAGES (IMG_ID, THUMBNAIL_ID, CHANNEL_STACK_ID, CHANNEL_ID) values " + + "(?{1.imageId}, ?{1.thumbnailId}, ?{1.channelStackId}, ?{1.channelId})", batchUpdate = true) + public void addAcquiredImages(List<ImgAcquiredImageDTO> acquiredImages); + // inserts @Select("insert into EXPERIMENTS (PERM_ID) values (?{1}) returning ID") public long addExperiment(String experimentPermId); - @Select("insert into ACQUIRED_IMAGES (IMG_ID, THUMBNAIL_ID, CHANNEL_STACK_ID, CHANNEL_ID) values " - + "(?{1.imageId}, ?{1.thumbnailId}, ?{1.channelStackId}, ?{1.channelId}) returning ID") - public long addAcquiredImage(ImgAcquiredImageDTO acquiredImage); - @Select("insert into CHANNELS (NAME, DESCRIPTION, WAVELENGTH, DS_ID, EXP_ID) values " + "(?{1.name}, ?{1.description}, ?{1.wavelength}, ?{1.datasetId}, ?{1.experimentId}) returning ID") public long addChannel(ImgChannelDTO channel); - @Select("insert into CHANNEL_STACKS (X, Y, Z_in_M, T_in_SEC, DS_ID, SPOT_ID) values " - + "(?{1.column}, ?{1.row}, ?{1.z}, ?{1.t}, ?{1.datasetId}, ?{1.spotId}) returning ID") - public long addChannelStack(ImgChannelStackDTO channelStack); - @Select("insert into CONTAINERS (PERM_ID, SPOTS_WIDTH, SPOTS_HEIGHT, EXPE_ID) values " + "(?{1.permId}, ?{1.numberOfColumns}, ?{1.numberOfRows}, ?{1.experimentId}) returning ID") public long addContainer(ImgContainerDTO container); @@ -119,10 +133,6 @@ public interface IImagingQueryDAO extends TransactionQuery + "?{1.fieldNumberOfRows}, ?{1.containerId}) returning ID") public long addDataset(ImgDatasetDTO dataset); - @Select("insert into IMAGES (PATH, PAGE, COLOR) values " - + "(?{1.filePath}, ?{1.page}, ?{1.colorComponentAsString}) returning ID") - public long addImage(ImgImageDTO image); - @Select("insert into SPOTS (X, Y, CONT_ID, PERM_ID) values " + "(?{1.column}, ?{1.row}, ?{1.containerId}, ?{1.permId}) returning ID") public long addSpot(ImgSpotDTO spot); diff --git a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgChannelStackDTO.java b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgChannelStackDTO.java index 2fd24910d0b..8d616e35a6d 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgChannelStackDTO.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgChannelStackDTO.java @@ -16,7 +16,6 @@ package ch.systemsx.cisd.openbis.plugin.screening.shared.imaging.dataaccess; -import net.lemnik.eodsql.AutoGeneratedKeys; import net.lemnik.eodsql.ResultColumn; import ch.systemsx.cisd.common.utilities.AbstractHashable; @@ -26,7 +25,7 @@ import ch.systemsx.cisd.common.utilities.AbstractHashable; */ public class ImgChannelStackDTO extends AbstractHashable { - @AutoGeneratedKeys + @ResultColumn("ID") private long id; // x and y are kind of a two dimensional sequence number, (e.g. tile column) @@ -58,8 +57,9 @@ public class ImgChannelStackDTO extends AbstractHashable // All Data-Object classes must have a default constructor. } - public ImgChannelStackDTO(int row, int column, long datasetId, long spotId) + public ImgChannelStackDTO(long id, int row, int column, long datasetId, long spotId) { + this.id = id; this.row = row; this.column = column; this.datasetId = datasetId; diff --git a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgImageDTO.java b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgImageDTO.java index ebdeabae3cb..3c0d33c38e1 100644 --- a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgImageDTO.java +++ b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImgImageDTO.java @@ -16,8 +16,6 @@ package ch.systemsx.cisd.openbis.plugin.screening.shared.imaging.dataaccess; - -import net.lemnik.eodsql.AutoGeneratedKeys; import net.lemnik.eodsql.ResultColumn; import ch.systemsx.cisd.common.utilities.AbstractHashable; @@ -27,7 +25,7 @@ import ch.systemsx.cisd.common.utilities.AbstractHashable; */ public class ImgImageDTO extends AbstractHashable { - @AutoGeneratedKeys + @ResultColumn("id") private long id; @ResultColumn("PATH") @@ -45,8 +43,10 @@ public class ImgImageDTO extends AbstractHashable // All Data-Object classes must have a default constructor. } - public ImgImageDTO(String filePath, Integer pageOrNull, ColorComponent colorComponentOrNull) + public ImgImageDTO(long id, String filePath, Integer pageOrNull, + ColorComponent colorComponentOrNull) { + this.id = id; this.filePath = filePath; this.pageOrNull = pageOrNull; this.colorComponentOrNull = diff --git a/screening/sourceTest/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImagingQueryDAOTest.java b/screening/sourceTest/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImagingQueryDAOTest.java index 2bd4945d1a1..054d6c65b91 100644 --- a/screening/sourceTest/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImagingQueryDAOTest.java +++ b/screening/sourceTest/java/ch/systemsx/cisd/openbis/plugin/screening/shared/imaging/dataaccess/ImagingQueryDAOTest.java @@ -20,6 +20,7 @@ import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNotNull; import java.sql.SQLException; +import java.util.Arrays; import java.util.List; import org.testng.AssertJUnit; @@ -148,8 +149,8 @@ public class ImagingQueryDAOTest extends AbstractDBTest || channels[1] == channelId1 && channels[0] == channelId2); // test get id of first channel - assertEquals(channels[0], dao.tryGetChannelIdByChannelNameDatasetIdOrExperimentId(datasetId, - experimentId, "dsChannel").intValue()); + assertEquals(channels[0], dao.tryGetChannelIdByChannelNameDatasetIdOrExperimentId( + datasetId, experimentId, "dsChannel").intValue()); List<ImgChannelDTO> experimentChannels = dao.getChannelsByExperimentId(experimentId); assertEquals(1, experimentChannels.size()); @@ -164,8 +165,9 @@ public class ImagingQueryDAOTest extends AbstractDBTest private long addImage(String path, ColorComponent colorComponent) { - final ImgImageDTO image = new ImgImageDTO(path, PAGE, colorComponent); - return dao.addImage(image); + final ImgImageDTO image = new ImgImageDTO(dao.createImageId(), path, PAGE, colorComponent); + dao.addImages(Arrays.asList(image)); + return image.getId(); } private long addExperiment() @@ -243,17 +245,20 @@ public class ImagingQueryDAOTest extends AbstractDBTest private long addChannelStack(long datasetId, long spotId) { final ImgChannelStackDTO channelStack = - new ImgChannelStackDTO(Y_TILE_ROW, X_TILE_COLUMN, datasetId, spotId); - return dao.addChannelStack(channelStack); + new ImgChannelStackDTO(dao.createChannelStackId(), Y_TILE_ROW, X_TILE_COLUMN, + datasetId, spotId); + dao.addChannelStacks(Arrays.asList(channelStack)); + return channelStack.getId(); } - private long addAcquiredImage(long imageId, long channelStackId, long channelId) + private void addAcquiredImage(long imageId, long channelStackId, long channelId) { final ImgAcquiredImageDTO acquiredImage = new ImgAcquiredImageDTO(); acquiredImage.setImageId(imageId); acquiredImage.setChannelStackId(channelStackId); acquiredImage.setChannelId(channelId); - return dao.addAcquiredImage(acquiredImage); + + dao.addAcquiredImages(Arrays.asList(acquiredImage)); } } -- GitLab