diff --git a/common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java b/common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java index 3110a359c1c512fe5f012ca9417b745dc2b7642e..0ea5f3ec61e0db829ffcc79c07bc9b7ea29fbc6b 100644 --- a/common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java +++ b/common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java @@ -18,9 +18,10 @@ import org.apache.commons.lang.text.StrTokenizer; */ public class DefaultLineTokenizer implements ILineTokenizer { - + /** Allowed <code>Properties</code> keys. */ - public static enum PropertyKey { + public static enum PropertyKey + { /** The property key for setting some delimiter characters. */ SEPARATOR_CHARS, /** The property key for setting some quote characters. */ @@ -30,59 +31,108 @@ public class DefaultLineTokenizer implements ILineTokenizer /** The property key for setting some trimmer characters. */ TRIMMER_CHARS, } - + /** Some properties for this tab parser. */ private final Map<PropertyKey, String> properties; - + + /** The default <code>StrMatcher</code> for each <code>PropertyKey</code>. */ + private final static Map<PropertyKey, StrMatcher> defaultStrMatchers = createDefaultStrMatchers(); + + /** + * Original value is <code>null</code>. + * <p> + * If not <code>null</code> then we assume that {@link #init()} method has been called. + * </p> + */ private StrTokenizer tokenizer; - + public DefaultLineTokenizer() { this.properties = new EnumMap<PropertyKey, String>(PropertyKey.class); } + private static final Map<PropertyKey, StrMatcher> createDefaultStrMatchers() + { + EnumMap<PropertyKey, StrMatcher> map = new EnumMap<PropertyKey, StrMatcher>(PropertyKey.class); + map.put(PropertyKey.SEPARATOR_CHARS, StrMatcher.tabMatcher()); + map.put(PropertyKey.QUOTE_CHARS, StrMatcher.noneMatcher()); + map.put(PropertyKey.TRIMMER_CHARS, StrMatcher.trimMatcher()); + map.put(PropertyKey.IGNORED_CHARS, StrMatcher.noneMatcher()); + return map; + } + /** * Sets a property for this <code>TabReaderParser</code>. - * - * @throws IllegalArgumentException if given <code>key</code> could not found in {@link PropertyKey}. + * <p> + * Does nothing if given <code>key</code> is <code>null</code> and resets <code>key</code> to default value if + * given <code>value</code> is <code>null</code>. + * </p> */ public final void setProperty(PropertyKey key, String value) { + if (key == null) + { + return; + } + if (value == null) + { + properties.remove(key); + } properties.put(key, value); + if (tokenizer != null) + { + StrMatcher matcher = getStrMatcher(key); + if (key == PropertyKey.SEPARATOR_CHARS) + { + tokenizer.setDelimiterMatcher(matcher); + } else if (key == PropertyKey.QUOTE_CHARS) + { + tokenizer.setQuoteMatcher(matcher); + } else if (key == PropertyKey.TRIMMER_CHARS) + { + tokenizer.setTrimmerMatcher(matcher); + } else if (key == PropertyKey.IGNORED_CHARS) + { + tokenizer.setIgnoredMatcher(matcher); + } + } } - + /** Converts a defined <code>PropertyKey</code> into <code>StrMatcher</code>. */ - private final StrMatcher getStrMatcher(PropertyKey key, StrMatcher defaultMatcher) { - StrMatcher strMatcher = defaultMatcher; + private final StrMatcher getStrMatcher(PropertyKey key) + { + StrMatcher strMatcher = defaultStrMatchers.get(key); String value = properties.get(key); if (value != null) { + // Note that we use a set of characters (like <code>StringTokenizer</code>) does + // and not <code>StrMatcher.stringMatcher(value)</code> strMatcher = StrMatcher.charSetMatcher(value); } return strMatcher; } - + /////////////////////////////////////////////////////// // ILineTokenizer /////////////////////////////////////////////////////// - + public final void init() - { + { StrTokenizer strTokenizer = new StrTokenizer(); - strTokenizer.setDelimiterMatcher(getStrMatcher(PropertyKey.SEPARATOR_CHARS, StrMatcher.tabMatcher())); - strTokenizer.setQuoteMatcher(getStrMatcher(PropertyKey.QUOTE_CHARS, StrMatcher.noneMatcher())); - strTokenizer.setTrimmerMatcher(getStrMatcher(PropertyKey.TRIMMER_CHARS, StrMatcher.trimMatcher())); - strTokenizer.setIgnoredMatcher(getStrMatcher(PropertyKey.IGNORED_CHARS, StrMatcher.noneMatcher())); + strTokenizer.setDelimiterMatcher(getStrMatcher(PropertyKey.SEPARATOR_CHARS)); + strTokenizer.setQuoteMatcher(getStrMatcher(PropertyKey.QUOTE_CHARS)); + strTokenizer.setTrimmerMatcher(getStrMatcher(PropertyKey.TRIMMER_CHARS)); + strTokenizer.setIgnoredMatcher(getStrMatcher(PropertyKey.IGNORED_CHARS)); strTokenizer.setEmptyTokenAsNull(false); strTokenizer.setIgnoreEmptyTokens(false); this.tokenizer = strTokenizer; } - + public final String[] tokenize(int lineNumber, String line) { return tokenizer.reset(line).getTokenArray(); } - + public final void destroy() { tokenizer = null; diff --git a/common/sourceTest/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizerTest.java b/common/sourceTest/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizerTest.java new file mode 100644 index 0000000000000000000000000000000000000000..a1ab65ce48c631518b7a709bc2bf726ab7f2cc2f --- /dev/null +++ b/common/sourceTest/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizerTest.java @@ -0,0 +1,82 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +import static org.testng.Assert.*; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +/** + * Test cases for corresponding {@link DefaultLineTokenizer} class. + * + * @author Christian Ribeaud + */ +public final class DefaultLineTokenizerTest +{ + private DefaultLineTokenizer tokenizer; + + @BeforeClass + public final void initTokenizer() + { + tokenizer = new DefaultLineTokenizer(); + tokenizer.init(); + } + + @Test + public final void testTokenize() + { + // Default separator is '\t' + assertNotNull(tokenizer); + String line = "This\tis\ta\tline"; + String[] tokens = tokenizer.tokenize(0, line); + assertTrue(tokens.length == 4); + assertEquals(tokens[0], "This"); + assertEquals(tokens[1], "is"); + // Trim is whitespace + line = " This\t is \t a \tline "; + tokens = tokenizer.tokenize(0, line); + assertTrue(tokens.length == 4); + assertEquals(tokens[2], "a"); + assertEquals(tokens[3], "line"); + // Separators are " \t" + tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.SEPARATOR_CHARS, " \t"); + tokenizer.init(); + line = "This is \ta\tline"; + tokens = tokenizer.tokenize(0, line); + assertTrue(tokens.length == 5); + assertEquals(tokens[0], "This"); + assertEquals(tokens[1], "is"); + assertEquals(tokens[2], ""); + // Trying quote characters + tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.QUOTE_CHARS, "'"); + line = "'This rule'\t'is not'\ta\tline"; + tokens = tokenizer.tokenize(0, line); + assertTrue(tokens.length == 4); + assertEquals(tokens[0], "This rule"); + assertEquals(tokens[1], "is not"); + // Trying to set <code>null</code> + tokenizer.setProperty(null, null); + // Resetting to default values + tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.QUOTE_CHARS, null); + tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.SEPARATOR_CHARS, null); + line = " This\t is \t a \tline "; + tokens = tokenizer.tokenize(0, line); + assertTrue(tokens.length == 4); + assertEquals(tokens[0], "This"); + assertEquals(tokens[1], "is"); + } +} \ No newline at end of file