Skip to content
Snippets Groups Projects
Commit cd4875ce authored by jakubs's avatar jakubs
Browse files

SP-45, BIS-21 implement waiting for a specified retry period, before trying another retry

SVN: 25310
parent e1034b73
No related branches found
No related tags found
No related merge requests found
......@@ -155,6 +155,7 @@ public class TopLevelDataSetRegistratorGlobalState
this.storageRecoveryManager.setDropboxRecoveryStateDir(dropboxRecoveryStateDir);
this.storageRecoveryManager.setRecoveryMarkerFilesDir(recoveryMarkerFilesDirectory);
this.storageRecoveryManager.setMaximumRertyCount(getMaximumRecoveryCount(threadParameters.getThreadProperties()));
this.storageRecoveryManager.setRetryPeriodInSeconds(getMinimumRecoveryPeriod(threadParameters.getThreadProperties()));
// Initialize the DSS Registration Log Directory
new DssRegistrationLogDirectoryHelper(dssRegistrationLogDir).initializeSubdirectories();
......@@ -309,6 +310,8 @@ public class TopLevelDataSetRegistratorGlobalState
public static final String RECOVERY_MAX_RETRY_COUNT = "recovery-max-retry-count";
public static final String RECOVERY_MIN_RETRY_PERIOD = "recovery-min-retry-period";
private static File getStagingDir(File storeRoot, String shareId, Properties threadProperties)
{
return getShareLocalDir(storeRoot, shareId, threadProperties, STAGING_DIR, "staging");
......@@ -337,6 +340,10 @@ public class TopLevelDataSetRegistratorGlobalState
return PropertyUtils.getInt(threadProperties, RECOVERY_MAX_RETRY_COUNT, 50);
}
private static int getMinimumRecoveryPeriod(Properties threadProperties)
{
return PropertyUtils.getInt(threadProperties, RECOVERY_MIN_RETRY_PERIOD, 60);
}
/**
* Get a directory local to the share, respecting the user override, if one is specified, and
* defaulting to the defaultDirName.
......
......@@ -21,6 +21,8 @@ import static ch.systemsx.cisd.etlserver.ThreadParameters.ON_ERROR_DECISION_KEY;
import java.io.File;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.locks.Lock;
......@@ -494,6 +496,12 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat
throw new IllegalStateException("Recovery file " + recoveryFile + " doesn't exist");
}
if (false == retryPeriodHasPassed(recoveryInfo))
{
operationLog.info("Found recovery inforation for "+incomingFileOriginal+". The recovery won't happen as the retry period has not yet passed" );
return;
}
operationLog.info("will recover from broken registration. Found marker file "
+ recoveryMarkerFile + " and " + recoveryFile);
......@@ -534,6 +542,7 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat
// corrupt the recoveryMarkerFile?)
recoveryInfo.increaseTryCount();
recoveryInfo.setLastTry(new Date());
recoveryInfo.writeToFile(recoveryMarkerFile);
}
}
......@@ -561,12 +570,23 @@ public abstract class AbstractOmniscientTopLevelDataSetRegistrator<T extends Dat
handleRecoveryState(recoveryState, cleanupAction, recoveryMarkerFileCleanupAction);
}
/**
* Check wheter the last retry + retry period < date.now
*/
private boolean retryPeriodHasPassed(final DataSetStorageRecoveryInfo recoveryInfo)
{
Calendar c = Calendar.getInstance();
c.setTime(recoveryInfo.getLastTry());
c.add(Calendar.SECOND, state.getGlobalState().getStorageRecoveryManager()
.getRetryPeriodInSeconds());
return c.getTime().before(new Date());
}
private void handleRecoveryState(DataSetStoragePrecommitRecoveryState<T> recoveryState,
final IDelegatedActionWithResult<Boolean> cleanAfterwardsAction,
final IDelegatedActionWithResult<Boolean> recoveryMarkerCleanup)
{
// TODO: Jobs left to do here:
// rollback
// jython hooks
DssRegistrationLogger logger = recoveryState.getRegistrationLogger(state);
......
......@@ -36,7 +36,7 @@ public class DataSetStorageRecoveryInfo implements Serializable
private final File recoveryStateFile;
private final Date lastTry;
private Date lastTry;
private int tryCount;
......@@ -53,6 +53,11 @@ public class DataSetStorageRecoveryInfo implements Serializable
tryCount++;
}
public void setLastTry(Date lastTry)
{
this.lastTry = lastTry;
}
public File getRecoveryStateFile()
{
return recoveryStateFile;
......
......@@ -47,6 +47,8 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan
private int maxRetryCount = 50;
private int retryPeriodInSeconds = 60;
public <T extends DataSetInformation> void checkpointPrecommittedState(
DataSetStorageAlgorithmRunner<T> runner)
{
......@@ -161,4 +163,16 @@ public class DataSetStorageRecoveryManager implements IDataSetStorageRecoveryMan
{
return this.maxRetryCount;
}
public int getRetryPeriodInSeconds()
{
return retryPeriodInSeconds;
}
public void setRetryPeriodInSeconds(int retryPeriodInSeconds)
{
this.retryPeriodInSeconds = retryPeriodInSeconds;
}
}
......@@ -74,4 +74,14 @@ public interface IDataSetStorageRecoveryManager
int getMaximumRertyCount();
void setMaximumRertyCount(int maxRetryCount);
/**
* get's the minimum time period that must pass before the next retry will happen.
*/
int getRetryPeriodInSeconds();
/**
* set's the minimum time period that must pass before the next retry will happen.
*/
void setRetryPeriodInSeconds(int retryTimeInSeconds);
}
\ No newline at end of file
......@@ -216,6 +216,7 @@ public abstract class AbstractJythonDataSetHandlerTest extends AbstractFileSyste
one(storageRecoveryManager).setRecoveryMarkerFilesDir(new File(
new File(workingDirectory, "recovery-marker"), "jython-handler-test"));
one(storageRecoveryManager).setMaximumRertyCount(with(any(Integer.class)));
one(storageRecoveryManager).setRetryPeriodInSeconds(with(any(Integer.class)));
}
});
}
......
......@@ -20,6 +20,8 @@ import static ch.systemsx.cisd.common.Constants.IS_FINISHED_PREFIX;
import java.io.File;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
......@@ -79,12 +81,17 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED;
testCase.recoveryRertyCount = 3;
testCases.add(testCase);
testCase = new RecoveryTestCase("check retry count incremented");
testCase.recoveryLastTry = new Date();
testCase.nextTryInTheFuture = true;
testCases.add(testCase);
testCase = new RecoveryTestCase("check failure after the recovery count exceeded");
testCase.registrationCheckResult = RegistrationCheckResult.CHECK_FAILED_GIVE_UP;
testCase.recoveryRertyCount = 100;
testCases.add(testCase);
testCase = new RecoveryTestCase("basic unrecoverable");
testCase.canRecoverFromError = false;
testCases.add(testCase);
......@@ -139,11 +146,28 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
*/
protected int recoveryRertyCount = 0;
/**
* the timestamp of a recovery last try.
*/
// gets initialized to some date in the past in the constructor
protected Date recoveryLastTry;
/**
* if set to true, then the registration should do nothing
*/
protected boolean nextTryInTheFuture = false;
private RecoveryTestCase(String title)
{
this.title = title;
this.overrideProperties = new HashMap<String, String>();
this.overrideProperties.put("TEST_V2_API", "");
// this the ultra-awkward way of initializing the recovery last try to some date in the
// past
Calendar c = Calendar.getInstance();
c.set(2010, 1, 1);
recoveryLastTry = c.getTime();
}
@Override
......@@ -176,72 +200,93 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
if (testCase.canRecoverFromError)
{
if (testCase.recoveryRertyCount > 0)
{
setTheRertyCount(testCase.recoveryRertyCount);
}
setTheRecoveryInfo(testCase.recoveryRertyCount, testCase.recoveryLastTry);
assertRecoveryFile(testCase.recoveryRertyCount);
assertRecoveryFile(testCase.recoveryRertyCount, RecoveryInfoDateConstraint.ORIGINAL,
testCase.recoveryLastTry);
assertOriginalMarkerFileExists();
assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
handler.handle(markerFile);
// if failure happened here then don't expect recovery / marker files to be deleted
switch (testCase.registrationCheckResult)
if (testCase.nextTryInTheFuture)
{
case REGISTRATION_SUCCEEDED:
// item in store
assertStorageProcess(atomicatOperationDetails.recordedObject(), DATA_SET_CODE,
"sub_data_set_1", 0);
// FIXME: this is commented out to cover the bug! beware
// assertDirEmpty(stagingDirectory);
assertDirEmpty(precommitDirectory);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
break;
case REGISTRATION_FAILED:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertDirEmpty(stagingDirectory);
assertRecoveryFile(testCase.recoveryRertyCount,
RecoveryInfoDateConstraint.ORIGINAL, testCase.recoveryLastTry);
assertOriginalMarkerFileExists();
assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
// nothing happened
} else
{
assertPostRecoveryConstraints(testCase, atomicatOperationDetails);
}
} else
{
assertNoRecoveryTriggeredConstraints();
}
}
// FIXME: this is commented out to cover the bug! beware
// assertDirEmpty(precommitDirectory);
private void assertNoRecoveryTriggeredConstraints()
{
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
break;
case CHECK_FAILED:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
assertRecoveryFile(testCase.recoveryRertyCount + 1);
assertOriginalMarkerFileExists();
// marker file is still there
// recovery state file is still there
break;
case CHECK_FAILED_GIVE_UP:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
assertDirNotEmpty(precommitDirectory, "precommit should not be empty");
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
break;
}
} else
assertDirEmpty(stagingDirectory);
// FIXME: this check is commented out because of a bug!
// assertDirEmpty(precommitDirectory);
// assert there is no recovery file
// rolllback requirementes
}
private void assertPostRecoveryConstraints(
final RecoveryTestCase testCase,
final RecordingMatcher<ch.systemsx.cisd.openbis.generic.shared.dto.AtomicEntityOperationDetails> atomicatOperationDetails)
{
switch (testCase.registrationCheckResult)
{
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
assertDirEmpty(stagingDirectory);
//FIXME: this check is commented out because of a bug!
//assertDirEmpty(precommitDirectory);
// assert there is no recovery file
// rolllback requirementes
case REGISTRATION_SUCCEEDED:
// item in store
assertStorageProcess(atomicatOperationDetails.recordedObject(), DATA_SET_CODE,
"sub_data_set_1", 0);
// FIXME: this is commented out to cover the bug! beware
// assertDirEmpty(stagingDirectory);
assertDirEmpty(precommitDirectory);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
break;
case REGISTRATION_FAILED:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertDirEmpty(stagingDirectory);
// FIXME: this is commented out to cover the bug! beware
// assertDirEmpty(precommitDirectory);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
break;
case CHECK_FAILED:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertDirNotEmpty(precommitDirectory, "Precommit directory should not be empty");
assertRecoveryFile(testCase.recoveryRertyCount + 1,
RecoveryInfoDateConstraint.AFTER_ORIGINAL, testCase.recoveryLastTry);
assertOriginalMarkerFileExists();
// marker file is still there
// recovery state file is still there
break;
case CHECK_FAILED_GIVE_UP:
assertDataSetNotStoredProcess(DATA_SET_CODE);
assertNoOriginalMarkerFileExists();
assertNoRecoveryMarkerFile();
assertDirNotEmpty(precommitDirectory, "precommit should not be empty");
break;
}
}
......@@ -249,7 +294,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
/**
* Use this method to update the retry count in the recovery info file.
*/
private void setTheRertyCount(int count)
private void setTheRecoveryInfo(int count, Date lastTryDate)
{
File file = getCreatedRecoveryMarkerFile();
......@@ -262,6 +307,9 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
{
recoveryInfo.increaseTryCount();
}
recoveryInfo.setLastTry(lastTryDate);
recoveryInfo.writeToFile(file);
}
......@@ -289,10 +337,23 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
markerFile.exists());
}
private enum RecoveryInfoDateConstraint
{
/**
* the last try date is the one of the original recovery file
*/
ORIGINAL,
/**
* the last try date is later then the original recovery file
*/
AFTER_ORIGINAL
}
/**
* @param tryCount - the excepted stored number of tries in a recovery file
*/
private File assertRecoveryFile(int tryCount)
private File assertRecoveryFile(int tryCount, RecoveryInfoDateConstraint dateConstraint,
Date originalLastTryDate)
{
File file = getCreatedRecoveryMarkerFile();
assertTrue("The recovery marker file does not exist! " + file, file.exists());
......@@ -306,6 +367,18 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
assertEquals("The try count in a recovery file is incorrect", tryCount,
recoveryInfo.getTryCount());
switch (dateConstraint)
{
case ORIGINAL:
assertEquals(originalLastTryDate, recoveryInfo.getLastTry());
break;
case AFTER_ORIGINAL:
assertTrue(
"" + originalLastTryDate + " should be before " + recoveryInfo.getLastTry(),
originalLastTryDate.before(recoveryInfo.getLastTry()));
break;
}
return file;
}
......@@ -353,7 +426,7 @@ public class JythonDropboxRecoveryTest extends AbstractJythonDataSetHandlerTest
// now registration has failed with the exception. we continue depending on where
if (testCase.canRecoverFromError)
if (testCase.canRecoverFromError && false == testCase.nextTryInTheFuture)
{
checkRegistrationSucceeded();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment