Skip to content
Snippets Groups Projects
Commit cd4875ce authored by jakubs's avatar jakubs
Browse files

SP-45, BIS-21 implement waiting for a specified retry period, before trying another retry

SVN: 25310
parent e1034b73
No related branches found
No related tags found
No related merge requests found
...@@ -155,6 +155,7 @@ public class TopLevelDataSetRegistratorGlobalState ...@@ -155,6 +155,7 @@ public class TopLevelDataSetRegistratorGlobalState
this.storageRecoveryManager.setDropboxRecoveryStateDir(dropboxRecoveryStateDir); this.storageRecoveryManager.setDropboxRecoveryStateDir(dropboxRecoveryStateDir);
this.storageRecoveryManager.setRecoveryMarkerFilesDir(recoveryMarkerFilesDirectory); this.storageRecoveryManager.setRecoveryMarkerFilesDir(recoveryMarkerFilesDirectory);
this.storageRecoveryManager.setMaximumRertyCount(getMaximumRecoveryCount(threadParameters.getThreadProperties())); this.storageRecoveryManager.setMaximumRertyCount(getMaximumRecoveryCount(threadParameters.getThreadProperties()));
this.storageRecoveryManager.setRetryPeriodInSeconds(getMinimumRecoveryPeriod(threadParameters.getThreadProperties()));
// Initialize the DSS Registration Log Directory // Initialize the DSS Registration Log Directory
new DssRegistrationLogDirectoryHelper(dssRegistrationLogDir).initializeSubdirectories(); new DssRegistrationLogDirectoryHelper(dssRegistrationLogDir).initializeSubdirectories();
...@@ -309,6 +310,8 @@ public class TopLevelDataSetRegistratorGlobalState ...@@ -309,6 +310,8 @@ public class TopLevelDataSetRegistratorGlobalState
public static final String RECOVERY_MAX_RETRY_COUNT = "recovery-max-retry-count"; public static final String RECOVERY_MAX_RETRY_COUNT = "recovery-max-retry-count";
public static final String RECOVERY_MIN_RETRY_PERIOD = "recovery-min-retry-period";
private static File getStagingDir(File storeRoot, String shareId, Properties threadProperties) private static File getStagingDir(File storeRoot, String shareId, Properties threadProperties)
{ {
return getShareLocalDir(storeRoot, shareId, threadProperties, STAGING_DIR, "staging"); return getShareLocalDir(storeRoot, shareId, threadProperties, STAGING_DIR, "staging");
...@@ -337,6 +340,10 @@ public class TopLevelDataSetRegistratorGlobalState ...@@ -337,6 +340,10 @@ public class TopLevelDataSetRegistratorGlobalState
return PropertyUtils.getInt(threadProperties, RECOVERY_MAX_RETRY_COUNT, 50); return PropertyUtils.getInt(threadProperties, RECOVERY_MAX_RETRY_COUNT, 50);
} }
private static int getMinimumRecoveryPeriod(Properties threadProperties)
{
return PropertyUtils.getInt(threadProperties, RECOVERY_MIN_RETRY_PERIOD, 60);
}
/** /**
* Get a directory local to the share, respecting the user override, if one is specified, and * Get a directory local to the share, respecting the user override, if one is specified, and
* defaulting to the defaultDirName. * defaulting to the defaultDirName.
......
...@@ -21,6 +21,8 @@ import static ch.systemsx.cisd.etlserver.ThreadParameters.ON_ERROR_DECISION_KEY; ...@@ -21,6 +21,8 @@ import static ch.systemsx.cisd.etlserver.ThreadParameters.ON_ERROR_DECISION_KEY;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Properties; import java.util.Properties;
import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.Lock;
...@@ -494,6 +496,12 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat ...@@ -494,6 +496,12 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat
throw new IllegalStateException("Recovery file " + recoveryFile + " doesn't exist"); throw new IllegalStateException("Recovery file " + recoveryFile + " doesn't exist");
} }
if (false == retryPeriodHasPassed(recoveryInfo))
{
operationLog.info("Found recovery inforation for "+incomingFileOriginal+". The recovery won't happen as the retry period has not yet passed" );
return;
}
operationLog.info("will recover from broken registration. Found marker file " operationLog.info("will recover from broken registration. Found marker file "
+ recoveryMarkerFile + " and " + recoveryFile); + recoveryMarkerFile + " and " + recoveryFile);
...@@ -534,6 +542,7 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat ...@@ -534,6 +542,7 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat
// corrupt the recoveryMarkerFile?) // corrupt the recoveryMarkerFile?)
recoveryInfo.increaseTryCount(); recoveryInfo.increaseTryCount();
recoveryInfo.setLastTry(new Date());
recoveryInfo.writeToFile(recoveryMarkerFile); recoveryInfo.writeToFile(recoveryMarkerFile);
} }
} }
...@@ -561,12 +570,23 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat ...@@ -561,12 +570,23 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat
handleRecoveryState(recoveryState, cleanupAction, recoveryMarkerFileCleanupAction); handleRecoveryState(recoveryState, cleanupAction, recoveryMarkerFileCleanupAction);
} }
/**
* Check wheter the last retry + retry period < date.now
*/
private boolean retryPeriodHasPassed(final DataSetStorageRecoveryInfo recoveryInfo)
{
Calendar c = Calendar.getInstance();
c.setTime(recoveryInfo.getLastTry());
c.add(Calendar.SECOND, state.getGlobalState().getStorageRecoveryManager()
.getRetryPeriodInSeconds());
return c.getTime().before(new Date());
}
private void handleRecoveryState(DataSetStoragePrecommitRecoveryState<T> recoveryState, private void handleRecoveryState(DataSetStoragePrecommitRecoveryState<T> recoveryState,
final IDelegatedActionWithResult<Boolean> cleanAfterwardsAction, final IDelegatedActionWithResult<Boolean> cleanAfterwardsAction,
final IDelegatedActionWithResult<Boolean> recoveryMarkerCleanup) final IDelegatedActionWithResult<Boolean> recoveryMarkerCleanup)
{ {
// TODO: Jobs left to do here: // TODO: Jobs left to do here:
// rollback
// jython hooks // jython hooks
DssRegistrationLogger logger = recoveryState.getRegistrationLogger(state); DssRegistrationLogger logger = recoveryState.getRegistrationLogger(state);
......
...@@ -36,7 +36,7 @@ public class DataSetStorageRecoveryInfo implements Serializable ...@@ -36,7 +36,7 @@ public class DataSetStorageRecoveryInfo implements Serializable
private final File recoveryStateFile; private final File recoveryStateFile;
private final Date lastTry; private Date lastTry;
private int tryCount; private int tryCount;
...@@ -53,6 +53,11 @@ public class DataSetStorageRecoveryInfo implements Serializable ...@@ -53,6 +53,11 @@ public class DataSetStorageRecoveryInfo implements Serializable
tryCount++; tryCount++;
} }
public void setLastTry(Date lastTry)
{
this.lastTry = lastTry;
}
public File getRecoveryStateFile() public File getRecoveryStateFile()
{ {
return recoveryStateFile; return recoveryStateFile;
......
...@@ -47,6 +47,8 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan ...@@ -47,6 +47,8 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan
private int maxRetryCount = 50; private int maxRetryCount = 50;
private int retryPeriodInSeconds = 60;
public <T extends DataSetInformation> void checkpointPrecommittedState( public <T extends DataSetInformation> void checkpointPrecommittedState(
DataSetStorageAlgorithmRunner<T> runner) DataSetStorageAlgorithmRunner<T> runner)
{ {
...@@ -161,4 +163,16 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan ...@@ -161,4 +163,16 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan
{ {
return this.maxRetryCount; return this.maxRetryCount;
} }
public int getRetryPeriodInSeconds()
{
return retryPeriodInSeconds;
}
public void setRetryPeriodInSeconds(int retryPeriodInSeconds)
{
this.retryPeriodInSeconds = retryPeriodInSeconds;
}
} }
...@@ -74,4 +74,14 @@ public interface IDataSetStorageRecoveryManager ...@@ -74,4 +74,14 @@ public interface IDataSetStorageRecoveryManager
int getMaximumRertyCount(); int getMaximumRertyCount();
void setMaximumRertyCount(int maxRetryCount); void setMaximumRertyCount(int maxRetryCount);
/**
* get's the minimum time period that must pass before the next retry will happen.
*/
int getRetryPeriodInSeconds();
/**
* set's the minimum time period that must pass before the next retry will happen.
*/
void setRetryPeriodInSeconds(int retryTimeInSeconds);
} }
\ No newline at end of file
...@@ -216,6 +216,7 @@ public abstract class AbstractJythonDataSetHandlerTest extends AbstractFileSyste ...@@ -216,6 +216,7 @@ public abstract class AbstractJythonDataSetHandlerTest extends AbstractFileSyste
one(storageRecoveryManager).setRecoveryMarkerFilesDir(new File( one(storageRecoveryManager).setRecoveryMarkerFilesDir(new File(
new File(workingDirectory, "recovery-marker"), "jython-handler-test")); new File(workingDirectory, "recovery-marker"), "jython-handler-test"));
one(storageRecoveryManager).setMaximumRertyCount(with(any(Integer.class))); one(storageRecoveryManager).setMaximumRertyCount(with(any(Integer.class)));
one(storageRecoveryManager).setRetryPeriodInSeconds(with(any(Integer.class)));
} }
}); });
} }
......
...@@ -20,6 +20,8 @@ import static ch.systemsx.cisd.common.Constants.IS_FINISHED_PREFIX; ...@@ -20,6 +20,8 @@ import static ch.systemsx.cisd.common.Constants.IS_FINISHED_PREFIX;
import java.io.File; import java.io.File;
import java.util.Arrays; import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
...@@ -79,12 +81,17 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -79,12 +81,17 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED; testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED;
testCase.recoveryRertyCount = 3; testCase.recoveryRertyCount = 3;
testCases.add(testCase); testCases.add(testCase);
testCase = new RecoveryTestCase("check retry count incremented");
testCase.recoveryLastTry = new Date();
testCase.nextTryInTheFuture = true;
testCases.add(testCase);
testCase = new RecoveryTestCase("check failure after the recovery count exceeded"); testCase = new RecoveryTestCase("check failure after the recovery count exceeded");
testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED_GIVE_UP; testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED_GIVE_UP;
testCase.recoveryRertyCount = 100; testCase.recoveryRertyCount = 100;
testCases.add(testCase); testCases.add(testCase);
testCase = new RecoveryTestCase("basic unrecoverable"); testCase = new RecoveryTestCase("basic unrecoverable");
testCase.canRecoverFromError = false; testCase.canRecoverFromError = false;
testCases.add(testCase); testCases.add(testCase);
...@@ -139,11 +146,28 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -139,11 +146,28 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
*/ */
protected int recoveryRertyCount = 0; protected int recoveryRertyCount = 0;
/**
* the timestamp of a recovery last try.
*/
// gets initialized to some date in the past in the constructor
protected Date recoveryLastTry;
/**
* if set to true, then the registration should do nothing
*/
protected boolean nextTryInTheFuture = false;
private RecoveryTestCase(String title) private RecoveryTestCase(String title)
{ {
this.title = title; this.title = title;
this.overrideProperties = new HashMap<String, String>(); this.overrideProperties = new HashMap<String, String>();
this.overrideProperties.put("TEST_V2_API", ""); this.overrideProperties.put("TEST_V2_API", "");
// this the ultra-awkward way of initializing the recovery last try to some date in the
// past
Calendar c = Calendar.getInstance();
c.set(2010, 1, 1);
recoveryLastTry = c.getTime();
} }
@Override @Override
...@@ -176,72 +200,93 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -176,72 +200,93 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
if (testCase.canRecoverFromError) if (testCase.canRecoverFromError)
{ {
if (testCase.recoveryRertyCount > 0) setTheRecoveryInfo(testCase.recoveryRertyCount, testCase.recoveryLastTry);
{
setTheRertyCount(testCase.recoveryRertyCount);
}
assertRecoveryFile(testCase.recoveryRertyCount); assertRecoveryFile(testCase.recoveryRertyCount, RecoveryInfoDateConstraint.ORIGINAL,
testCase.recoveryLastTry);
assertOriginalMarkerFileExists(); assertOriginalMarkerFileExists();
assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
handler.handle(markerFile); handler.handle(markerFile);
// if failure happened here then don't expect recovery / marker files to be deleted if (testCase.nextTryInTheFuture)
switch (testCase.registrationCheckResult)
{ {
case REGISTRATION_SUCCEEDED: assertRecoveryFile(testCase.recoveryRertyCount,
// item in store RecoveryInfoDateConstraint.ORIGINAL, testCase.recoveryLastTry);
assertStorageProcess(atomicatOperationDetails.recordedObject(), DATA_SET_CODE, assertOriginalMarkerFileExists();
"sub_data_set_1", 0); assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
// FIXME: this is commented out to cover the bug! beware // nothing happened
// assertDirEmpty(stagingDirectory); } else
assertDirEmpty(precommitDirectory); {
assertPostRecoveryConstraints(testCase, atomicatOperationDetails);
assertNoOriginalMarkerFileExists(); }
assertNoRecoveryMarkerFile(); } else
break; {
case REGISTRATION_FAILED: assertNoRecoveryTriggeredConstraints();
assertDataSetNotStoredProcess(DATA_SET_CODE); }
assertDirEmpty(stagingDirectory); }
// FIXME: this is commented out to cover the bug! beware private void assertNoRecoveryTriggeredConstraints()
// assertDirEmpty(precommitDirectory); {
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertNoOriginalMarkerFileExists(); assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile(); assertNoRecoveryMarkerFile();
break;
case CHECK_FAILED:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
assertRecoveryFile(testCase.recoveryRertyCount + 1);
assertOriginalMarkerFileExists();
// marker file is still there
// recovery state file is still there
break;
case CHECK_FAILED_GIVE_UP:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
assertDirNotEmpty(precommitDirectory, "precommit should not be empty");
break; assertDirEmpty(stagingDirectory);
} // FIXME: this check is commented out because of a bug!
} else // assertDirEmpty(precommitDirectory);
// assert there is no recovery file
// rolllback requirementes
}
private void assertPostRecoveryConstraints(
final RecoveryTestCase testCase,
final RecordingMatcher<ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails> atomicatOperationDetails)
{
switch (testCase.registrationCheckResult)
{ {
assertDataSetNotStoredProcess(DATA_SET_CODE); case REGISTRATION_SUCCEEDED:
// item in store
assertNoOriginalMarkerFileExists(); assertStorageProcess(atomicatOperationDetails.recordedObject(), DATA_SET_CODE,
assertNoRecoveryMarkerFile(); "sub_data_set_1", 0);
// FIXME: this is commented out to cover the bug! beware
assertDirEmpty(stagingDirectory); // assertDirEmpty(stagingDirectory);
//FIXME: this check is commented out because of a bug! assertDirEmpty(precommitDirectory);
//assertDirEmpty(precommitDirectory);
assertNoOriginalMarkerFileExists();
// assert there is no recovery file assertNoRecoveryMarkerFile();
// rolllback requirementes break;
case REGISTRATION_FAILED:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertDirEmpty(stagingDirectory);
// FIXME: this is commented out to cover the bug! beware
// assertDirEmpty(precommitDirectory);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
break;
case CHECK_FAILED:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
assertRecoveryFile(testCase.recoveryRertyCount + 1,
RecoveryInfoDateConstraint.AFTER_ORIGINAL, testCase.recoveryLastTry);
assertOriginalMarkerFileExists();
// marker file is still there
// recovery state file is still there
break;
case CHECK_FAILED_GIVE_UP:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
assertDirNotEmpty(precommitDirectory, "precommit should not be empty");
break;
} }
} }
...@@ -249,7 +294,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -249,7 +294,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
/** /**
* Use this method to update the retry count in the recovery info file. * Use this method to update the retry count in the recovery info file.
*/ */
private void setTheRertyCount(int count) private void setTheRecoveryInfo(int count, Date lastTryDate)
{ {
File file = getCreatedRecoveryMarkerFile(); File file = getCreatedRecoveryMarkerFile();
...@@ -262,6 +307,9 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -262,6 +307,9 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
{ {
recoveryInfo.increaseTryCount(); recoveryInfo.increaseTryCount();
} }
recoveryInfo.setLastTry(lastTryDate);
recoveryInfo.writeToFile(file); recoveryInfo.writeToFile(file);
} }
...@@ -289,10 +337,23 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -289,10 +337,23 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
markerFile.exists()); markerFile.exists());
} }
private enum RecoveryInfoDateConstraint
{
/**
* the last try date is the one of the original recovery file
*/
ORIGINAL,
/**
* the last try date is later then the original recovery file
*/
AFTER_ORIGINAL
}
/** /**
* @param tryCount - the excepted stored number of tries in a recovery file * @param tryCount - the excepted stored number of tries in a recovery file
*/ */
private File assertRecoveryFile(int tryCount) private File assertRecoveryFile(int tryCount, RecoveryInfoDateConstraint dateConstraint,
Date originalLastTryDate)
{ {
File file = getCreatedRecoveryMarkerFile(); File file = getCreatedRecoveryMarkerFile();
assertTrue("The recovery marker file does not exist! " + file, file.exists()); assertTrue("The recovery marker file does not exist! " + file, file.exists());
...@@ -306,6 +367,18 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -306,6 +367,18 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
assertEquals("The try count in a recovery file is incorrect", tryCount, assertEquals("The try count in a recovery file is incorrect", tryCount,
recoveryInfo.getTryCount()); recoveryInfo.getTryCount());
switch (dateConstraint)
{
case ORIGINAL:
assertEquals(originalLastTryDate, recoveryInfo.getLastTry());
break;
case AFTER_ORIGINAL:
assertTrue(
"" + originalLastTryDate + " should be before " + recoveryInfo.getLastTry(),
originalLastTryDate.before(recoveryInfo.getLastTry()));
break;
}
return file; return file;
} }
...@@ -353,7 +426,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest ...@@ -353,7 +426,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
// now registration has failed with the exception. we continue depending on where // now registration has failed with the exception. we continue depending on where
if (testCase.canRecoverFromError) if (testCase.canRecoverFromError && false == testCase.nextTryInTheFuture)
{ {
checkRegistrationSucceeded(); checkRegistrationSucceeded();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment