diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/TopLevelDataSetRegistratorGlobalState.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/TopLevelDataSetRegistratorGlobalState.java index 0144a00d43749adbf965fb287bdca4caa611279b..b82016561e7579c305bfae41474e03c1cab5eb84 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/TopLevelDataSetRegistratorGlobalState.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/TopLevelDataSetRegistratorGlobalState.java @@ -155,6 +155,7 @@ public class TopLevelDataSetRegistratorGlobalState this.storageRecoveryManager.setDropboxRecoveryStateDir(dropboxRecoveryStateDir); this.storageRecoveryManager.setRecoveryMarkerFilesDir(recoveryMarkerFilesDirectory); this.storageRecoveryManager.setMaximumRertyCount(getMaximumRecoveryCount(threadParameters.getThreadProperties())); + this.storageRecoveryManager.setRetryPeriodInSeconds(getMinimumRecoveryPeriod(threadParameters.getThreadProperties())); // Initialize the DSS Registration Log Directory new DssRegistrationLogDirectoryHelper(dssRegistrationLogDir).initializeSubdirectories(); @@ -309,6 +310,8 @@ public class TopLevelDataSetRegistratorGlobalState public static final String RECOVERY_MAX_RETRY_COUNT = "recovery-max-retry-count"; + public static final String RECOVERY_MIN_RETRY_PERIOD = "recovery-min-retry-period"; + private static File getStagingDir(File storeRoot, String shareId, Properties threadProperties) { return getShareLocalDir(storeRoot, shareId, threadProperties, STAGING_DIR, "staging"); @@ -337,6 +340,10 @@ public class TopLevelDataSetRegistratorGlobalState return PropertyUtils.getInt(threadProperties, RECOVERY_MAX_RETRY_COUNT, 50); } + private static int getMinimumRecoveryPeriod(Properties threadProperties) + { + return PropertyUtils.getInt(threadProperties, RECOVERY_MIN_RETRY_PERIOD, 60); + } /** * Get a directory local to the share, respecting the user override, if one is specified, and * defaulting to the defaultDirName. diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/AbstractOmniscientTopLevelDataSetRegistrator.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/AbstractOmniscientTopLevelDataSetRegistrator.java index 1d72469af08095982f53b73e955375742cf09ac5..3043c7526f8fa4c18a15f4fda38575de84b6d5f0 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/AbstractOmniscientTopLevelDataSetRegistrator.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/AbstractOmniscientTopLevelDataSetRegistrator.java @@ -21,6 +21,8 @@ import static ch.systemsx.cisd.etlserver.ThreadParameters.ON_ERROR_DECISION_KEY; import java.io.File; import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; import java.util.List; import java.util.Properties; import java.util.concurrent.locks.Lock; @@ -494,6 +496,12 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat throw new IllegalStateException("Recovery file " + recoveryFile + " doesn't exist"); } + if (false == retryPeriodHasPassed(recoveryInfo)) + { + operationLog.info("Found recovery inforation for "+incomingFileOriginal+". The recovery won't happen as the retry period has not yet passed" ); + return; + } + operationLog.info("will recover from broken registration. Found marker file " + recoveryMarkerFile + " and " + recoveryFile); @@ -534,6 +542,7 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat // corrupt the recoveryMarkerFile?) recoveryInfo.increaseTryCount(); + recoveryInfo.setLastTry(new Date()); recoveryInfo.writeToFile(recoveryMarkerFile); } } @@ -561,12 +570,23 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat handleRecoveryState(recoveryState, cleanupAction, recoveryMarkerFileCleanupAction); } + /** + * Check wheter the last retry + retry period < date.now + */ + private boolean retryPeriodHasPassed(final DataSetStorageRecoveryInfo recoveryInfo) + { + Calendar c = Calendar.getInstance(); + c.setTime(recoveryInfo.getLastTry()); + c.add(Calendar.SECOND, state.getGlobalState().getStorageRecoveryManager() + .getRetryPeriodInSeconds()); + return c.getTime().before(new Date()); + } + private void handleRecoveryState(DataSetStoragePrecommitRecoveryState<T> recoveryState, final IDelegatedActionWithResult<Boolean> cleanAfterwardsAction, final IDelegatedActionWithResult<Boolean> recoveryMarkerCleanup) { // TODO: Jobs left to do here: - // rollback // jython hooks DssRegistrationLogger logger = recoveryState.getRegistrationLogger(state); diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryInfo.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryInfo.java index 537f57fe6cbdea12f5cd4c6e1e37e57c24023674..9c5f4795f71e0598e09cc716a141353e1231ed3d 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryInfo.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryInfo.java @@ -36,7 +36,7 @@ public class DataSetStorageRecoveryInfo implements Serializable private final File recoveryStateFile; - private final Date lastTry; + private Date lastTry; private int tryCount; @@ -53,6 +53,11 @@ public class DataSetStorageRecoveryInfo implements Serializable tryCount++; } + public void setLastTry(Date lastTry) + { + this.lastTry = lastTry; + } + public File getRecoveryStateFile() { return recoveryStateFile; diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryManager.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryManager.java index 9ab0479c1f45a537cb8ccfe91e7baa0cc95c5fda..cc9e9f2c1b096cb9989649097402264a72bb434c 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryManager.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/DataSetStorageRecoveryManager.java @@ -47,6 +47,8 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan private int maxRetryCount = 50; + private int retryPeriodInSeconds = 60; + public <T extends DataSetInformation> void checkpointPrecommittedState( DataSetStorageAlgorithmRunner<T> runner) { @@ -161,4 +163,16 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan { return this.maxRetryCount; } + + public int getRetryPeriodInSeconds() + { + return retryPeriodInSeconds; + } + + public void setRetryPeriodInSeconds(int retryPeriodInSeconds) + { + this.retryPeriodInSeconds = retryPeriodInSeconds; + + } + } diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/IDataSetStorageRecoveryManager.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/IDataSetStorageRecoveryManager.java index 238d5d4246d459db68ac019afcaba3cbf130f8b6..fd7ddc86913f3ab02a74ec9ad75939c75a64cbf0 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/IDataSetStorageRecoveryManager.java +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/registrator/IDataSetStorageRecoveryManager.java @@ -74,4 +74,14 @@ public interface IDataSetStorageRecoveryManager int getMaximumRertyCount(); void setMaximumRertyCount(int maxRetryCount); + + /** + * get's the minimum time period that must pass before the next retry will happen. + */ + int getRetryPeriodInSeconds(); + + /** + * set's the minimum time period that must pass before the next retry will happen. + */ + void setRetryPeriodInSeconds(int retryTimeInSeconds); } \ No newline at end of file diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/AbstractJythonDataSetHandlerTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/AbstractJythonDataSetHandlerTest.java index 6354ae55f06552922dc7c479b7a28f0a24c6d950..148c1836528be66fd04e6118aeeb8801c0ed440c 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/AbstractJythonDataSetHandlerTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/AbstractJythonDataSetHandlerTest.java @@ -216,6 +216,7 @@ public abstract class AbstractJythonDataSetHandlerTest extends AbstractFileSyste one(storageRecoveryManager).setRecoveryMarkerFilesDir(new File( new File(workingDirectory, "recovery-marker"), "jython-handler-test")); one(storageRecoveryManager).setMaximumRertyCount(with(any(Integer.class))); + one(storageRecoveryManager).setRetryPeriodInSeconds(with(any(Integer.class))); } }); } diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/JythonDropboxRecoveryTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/JythonDropboxRecoveryTest.java index 9d1d7e4811fc9923f62fd8268ae7c202558a8df0..709559e10250d237c622a0539336570105660d8c 100644 --- a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/JythonDropboxRecoveryTest.java +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/registrator/JythonDropboxRecoveryTest.java @@ -20,6 +20,8 @@ import static ch.systemsx.cisd.common.Constants.IS_FINISHED_PREFIX; import java.io.File; import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -79,12 +81,17 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED; testCase.recoveryRertyCount = 3; testCases.add(testCase); - + + testCase = new RecoveryTestCase("check retry count incremented"); + testCase.recoveryLastTry = new Date(); + testCase.nextTryInTheFuture = true; + testCases.add(testCase); + testCase = new RecoveryTestCase("check failure after the recovery count exceeded"); testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED_GIVE_UP; testCase.recoveryRertyCount = 100; testCases.add(testCase); - + testCase = new RecoveryTestCase("basic unrecoverable"); testCase.canRecoverFromError = false; testCases.add(testCase); @@ -139,11 +146,28 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest */ protected int recoveryRertyCount = 0; + /** + * the timestamp of a recovery last try. + */ + // gets initialized to some date in the past in the constructor + protected Date recoveryLastTry; + + /** + * if set to true, then the registration should do nothing + */ + protected boolean nextTryInTheFuture = false; + private RecoveryTestCase(String title) { this.title = title; this.overrideProperties = new HashMap<String, String>(); this.overrideProperties.put("TEST_V2_API", ""); + + // this the ultra-awkward way of initializing the recovery last try to some date in the + // past + Calendar c = Calendar.getInstance(); + c.set(2010, 1, 1); + recoveryLastTry = c.getTime(); } @Override @@ -176,72 +200,93 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest if (testCase.canRecoverFromError) { - if (testCase.recoveryRertyCount > 0) - { - setTheRertyCount(testCase.recoveryRertyCount); - } + setTheRecoveryInfo(testCase.recoveryRertyCount, testCase.recoveryLastTry); - assertRecoveryFile(testCase.recoveryRertyCount); + assertRecoveryFile(testCase.recoveryRertyCount, RecoveryInfoDateConstraint.ORIGINAL, + testCase.recoveryLastTry); assertOriginalMarkerFileExists(); + assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty"); handler.handle(markerFile); - // if failure happened here then don't expect recovery / marker files to be deleted - - switch (testCase.registrationCheckResult) + if (testCase.nextTryInTheFuture) { - case REGISTRATION_SUCCEEDED: - // item in store - assertStorageProcess(atomicatOperationDetails.recordedObject(), DATA_SET_CODE, - "sub_data_set_1", 0); - // FIXME: this is commented out to cover the bug! beware - // assertDirEmpty(stagingDirectory); - assertDirEmpty(precommitDirectory); - - assertNoOriginalMarkerFileExists(); - assertNoRecoveryMarkerFile(); - break; - case REGISTRATION_FAILED: - assertDataSetNotStoredProcess(DATA_SET_CODE); - assertDirEmpty(stagingDirectory); + assertRecoveryFile(testCase.recoveryRertyCount, + RecoveryInfoDateConstraint.ORIGINAL, testCase.recoveryLastTry); + assertOriginalMarkerFileExists(); + assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty"); + // nothing happened + } else + { + assertPostRecoveryConstraints(testCase, atomicatOperationDetails); + } + } else + { + assertNoRecoveryTriggeredConstraints(); + } + } - // FIXME: this is commented out to cover the bug! beware - // assertDirEmpty(precommitDirectory); + private void assertNoRecoveryTriggeredConstraints() + { + assertDataSetNotStoredProcess(DATA_SET_CODE); - assertNoOriginalMarkerFileExists(); - assertNoRecoveryMarkerFile(); - break; - case CHECK_FAILED: - assertDataSetNotStoredProcess(DATA_SET_CODE); - - assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty"); - assertRecoveryFile(testCase.recoveryRertyCount + 1); - assertOriginalMarkerFileExists(); - // marker file is still there - // recovery state file is still there - break; - case CHECK_FAILED_GIVE_UP: - assertDataSetNotStoredProcess(DATA_SET_CODE); - assertNoOriginalMarkerFileExists(); - assertNoRecoveryMarkerFile(); - - assertDirNotEmpty(precommitDirectory, "precommit should not be empty"); + assertNoOriginalMarkerFileExists(); + assertNoRecoveryMarkerFile(); - break; - } - } else + assertDirEmpty(stagingDirectory); + // FIXME: this check is commented out because of a bug! + // assertDirEmpty(precommitDirectory); + + // assert there is no recovery file + // rolllback requirementes + } + + private void assertPostRecoveryConstraints( + final RecoveryTestCase testCase, + final RecordingMatcher<ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails> atomicatOperationDetails) + { + + switch (testCase.registrationCheckResult) { - assertDataSetNotStoredProcess(DATA_SET_CODE); - - assertNoOriginalMarkerFileExists(); - assertNoRecoveryMarkerFile(); - - assertDirEmpty(stagingDirectory); - //FIXME: this check is commented out because of a bug! - //assertDirEmpty(precommitDirectory); - - // assert there is no recovery file - // rolllback requirementes + case REGISTRATION_SUCCEEDED: + // item in store + assertStorageProcess(atomicatOperationDetails.recordedObject(), DATA_SET_CODE, + "sub_data_set_1", 0); + // FIXME: this is commented out to cover the bug! beware + // assertDirEmpty(stagingDirectory); + assertDirEmpty(precommitDirectory); + + assertNoOriginalMarkerFileExists(); + assertNoRecoveryMarkerFile(); + break; + case REGISTRATION_FAILED: + assertDataSetNotStoredProcess(DATA_SET_CODE); + assertDirEmpty(stagingDirectory); + + // FIXME: this is commented out to cover the bug! beware + // assertDirEmpty(precommitDirectory); + + assertNoOriginalMarkerFileExists(); + assertNoRecoveryMarkerFile(); + break; + case CHECK_FAILED: + assertDataSetNotStoredProcess(DATA_SET_CODE); + + assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty"); + assertRecoveryFile(testCase.recoveryRertyCount + 1, + RecoveryInfoDateConstraint.AFTER_ORIGINAL, testCase.recoveryLastTry); + assertOriginalMarkerFileExists(); + // marker file is still there + // recovery state file is still there + break; + case CHECK_FAILED_GIVE_UP: + assertDataSetNotStoredProcess(DATA_SET_CODE); + assertNoOriginalMarkerFileExists(); + assertNoRecoveryMarkerFile(); + + assertDirNotEmpty(precommitDirectory, "precommit should not be empty"); + + break; } } @@ -249,7 +294,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest /** * Use this method to update the retry count in the recovery info file. */ - private void setTheRertyCount(int count) + private void setTheRecoveryInfo(int count, Date lastTryDate) { File file = getCreatedRecoveryMarkerFile(); @@ -262,6 +307,9 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest { recoveryInfo.increaseTryCount(); } + + recoveryInfo.setLastTry(lastTryDate); + recoveryInfo.writeToFile(file); } @@ -289,10 +337,23 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest markerFile.exists()); } + private enum RecoveryInfoDateConstraint + { + /** + * the last try date is the one of the original recovery file + */ + ORIGINAL, + /** + * the last try date is later then the original recovery file + */ + AFTER_ORIGINAL + } + /** * @param tryCount - the excepted stored number of tries in a recovery file */ - private File assertRecoveryFile(int tryCount) + private File assertRecoveryFile(int tryCount, RecoveryInfoDateConstraint dateConstraint, + Date originalLastTryDate) { File file = getCreatedRecoveryMarkerFile(); assertTrue("The recovery marker file does not exist! " + file, file.exists()); @@ -306,6 +367,18 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest assertEquals("The try count in a recovery file is incorrect", tryCount, recoveryInfo.getTryCount()); + switch (dateConstraint) + { + case ORIGINAL: + assertEquals(originalLastTryDate, recoveryInfo.getLastTry()); + break; + case AFTER_ORIGINAL: + assertTrue( + "" + originalLastTryDate + " should be before " + recoveryInfo.getLastTry(), + originalLastTryDate.before(recoveryInfo.getLastTry())); + break; + } + return file; } @@ -353,7 +426,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest // now registration has failed with the exception. we continue depending on where - if (testCase.canRecoverFromError) + if (testCase.canRecoverFromError && false == testCase.nextTryInTheFuture) { checkRegistrationSucceeded();