Skip to content
Snippets Groups Projects
Commit 18545c0b authored by ribeaudc's avatar ribeaudc
Browse files

Unit test for DefaultLineTokenizer.

SVN: 198
parent f6735576
No related branches found
No related tags found
No related merge requests found
...@@ -18,9 +18,10 @@ import org.apache.commons.lang.text.StrTokenizer; ...@@ -18,9 +18,10 @@ import org.apache.commons.lang.text.StrTokenizer;
*/ */
public class DefaultLineTokenizer implements ILineTokenizer public class DefaultLineTokenizer implements ILineTokenizer
{ {
/** Allowed <code>Properties</code> keys. */ /** Allowed <code>Properties</code> keys. */
public static enum PropertyKey { public static enum PropertyKey
{
/** The property key for setting some delimiter characters. */ /** The property key for setting some delimiter characters. */
SEPARATOR_CHARS, SEPARATOR_CHARS,
/** The property key for setting some quote characters. */ /** The property key for setting some quote characters. */
...@@ -30,59 +31,108 @@ public class DefaultLineTokenizer implements ILineTokenizer ...@@ -30,59 +31,108 @@ public class DefaultLineTokenizer implements ILineTokenizer
/** The property key for setting some trimmer characters. */ /** The property key for setting some trimmer characters. */
TRIMMER_CHARS, TRIMMER_CHARS,
} }
/** Some properties for this tab parser. */ /** Some properties for this tab parser. */
private final Map<PropertyKey, String> properties; private final Map<PropertyKey, String> properties;
/** The default <code>StrMatcher</code> for each <code>PropertyKey</code>. */
private final static Map<PropertyKey, StrMatcher> defaultStrMatchers = createDefaultStrMatchers();
/**
* Original value is <code>null</code>.
* <p>
* If not <code>null</code> then we assume that {@link #init()} method has been called.
* </p>
*/
private StrTokenizer tokenizer; private StrTokenizer tokenizer;
public DefaultLineTokenizer() public DefaultLineTokenizer()
{ {
this.properties = new EnumMap<PropertyKey, String>(PropertyKey.class); this.properties = new EnumMap<PropertyKey, String>(PropertyKey.class);
} }
private static final Map<PropertyKey, StrMatcher> createDefaultStrMatchers()
{
EnumMap<PropertyKey, StrMatcher> map = new EnumMap<PropertyKey, StrMatcher>(PropertyKey.class);
map.put(PropertyKey.SEPARATOR_CHARS, StrMatcher.tabMatcher());
map.put(PropertyKey.QUOTE_CHARS, StrMatcher.noneMatcher());
map.put(PropertyKey.TRIMMER_CHARS, StrMatcher.trimMatcher());
map.put(PropertyKey.IGNORED_CHARS, StrMatcher.noneMatcher());
return map;
}
/** /**
* Sets a property for this <code>TabReaderParser</code>. * Sets a property for this <code>TabReaderParser</code>.
* * <p>
* @throws IllegalArgumentException if given <code>key</code> could not found in {@link PropertyKey}. * Does nothing if given <code>key</code> is <code>null</code> and resets <code>key</code> to default value if
* given <code>value</code> is <code>null</code>.
* </p>
*/ */
public final void setProperty(PropertyKey key, String value) public final void setProperty(PropertyKey key, String value)
{ {
if (key == null)
{
return;
}
if (value == null)
{
properties.remove(key);
}
properties.put(key, value); properties.put(key, value);
if (tokenizer != null)
{
StrMatcher matcher = getStrMatcher(key);
if (key == PropertyKey.SEPARATOR_CHARS)
{
tokenizer.setDelimiterMatcher(matcher);
} else if (key == PropertyKey.QUOTE_CHARS)
{
tokenizer.setQuoteMatcher(matcher);
} else if (key == PropertyKey.TRIMMER_CHARS)
{
tokenizer.setTrimmerMatcher(matcher);
} else if (key == PropertyKey.IGNORED_CHARS)
{
tokenizer.setIgnoredMatcher(matcher);
}
}
} }
/** Converts a defined <code>PropertyKey</code> into <code>StrMatcher</code>. */ /** Converts a defined <code>PropertyKey</code> into <code>StrMatcher</code>. */
private final StrMatcher getStrMatcher(PropertyKey key, StrMatcher defaultMatcher) { private final StrMatcher getStrMatcher(PropertyKey key)
StrMatcher strMatcher = defaultMatcher; {
StrMatcher strMatcher = defaultStrMatchers.get(key);
String value = properties.get(key); String value = properties.get(key);
if (value != null) if (value != null)
{ {
// Note that we use a set of characters (like <code>StringTokenizer</code>) does
// and not <code>StrMatcher.stringMatcher(value)</code>
strMatcher = StrMatcher.charSetMatcher(value); strMatcher = StrMatcher.charSetMatcher(value);
} }
return strMatcher; return strMatcher;
} }
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// ILineTokenizer // ILineTokenizer
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
public final void init() public final void init()
{ {
StrTokenizer strTokenizer = new StrTokenizer(); StrTokenizer strTokenizer = new StrTokenizer();
strTokenizer.setDelimiterMatcher(getStrMatcher(PropertyKey.SEPARATOR_CHARS, StrMatcher.tabMatcher())); strTokenizer.setDelimiterMatcher(getStrMatcher(PropertyKey.SEPARATOR_CHARS));
strTokenizer.setQuoteMatcher(getStrMatcher(PropertyKey.QUOTE_CHARS, StrMatcher.noneMatcher())); strTokenizer.setQuoteMatcher(getStrMatcher(PropertyKey.QUOTE_CHARS));
strTokenizer.setTrimmerMatcher(getStrMatcher(PropertyKey.TRIMMER_CHARS, StrMatcher.trimMatcher())); strTokenizer.setTrimmerMatcher(getStrMatcher(PropertyKey.TRIMMER_CHARS));
strTokenizer.setIgnoredMatcher(getStrMatcher(PropertyKey.IGNORED_CHARS, StrMatcher.noneMatcher())); strTokenizer.setIgnoredMatcher(getStrMatcher(PropertyKey.IGNORED_CHARS));
strTokenizer.setEmptyTokenAsNull(false); strTokenizer.setEmptyTokenAsNull(false);
strTokenizer.setIgnoreEmptyTokens(false); strTokenizer.setIgnoreEmptyTokens(false);
this.tokenizer = strTokenizer; this.tokenizer = strTokenizer;
} }
public final String[] tokenize(int lineNumber, String line) public final String[] tokenize(int lineNumber, String line)
{ {
return tokenizer.reset(line).getTokenArray(); return tokenizer.reset(line).getTokenArray();
} }
public final void destroy() public final void destroy()
{ {
tokenizer = null; tokenizer = null;
......
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
import static org.testng.Assert.*;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
/**
* Test cases for corresponding {@link DefaultLineTokenizer} class.
*
* @author Christian Ribeaud
*/
public final class DefaultLineTokenizerTest
{
private DefaultLineTokenizer tokenizer;
@BeforeClass
public final void initTokenizer()
{
tokenizer = new DefaultLineTokenizer();
tokenizer.init();
}
@Test
public final void testTokenize()
{
// Default separator is '\t'
assertNotNull(tokenizer);
String line = "This\tis\ta\tline";
String[] tokens = tokenizer.tokenize(0, line);
assertTrue(tokens.length == 4);
assertEquals(tokens[0], "This");
assertEquals(tokens[1], "is");
// Trim is whitespace
line = " This\t is \t a \tline ";
tokens = tokenizer.tokenize(0, line);
assertTrue(tokens.length == 4);
assertEquals(tokens[2], "a");
assertEquals(tokens[3], "line");
// Separators are " \t"
tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.SEPARATOR_CHARS, " \t");
tokenizer.init();
line = "This is \ta\tline";
tokens = tokenizer.tokenize(0, line);
assertTrue(tokens.length == 5);
assertEquals(tokens[0], "This");
assertEquals(tokens[1], "is");
assertEquals(tokens[2], "");
// Trying quote characters
tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.QUOTE_CHARS, "'");
line = "'This rule'\t'is not'\ta\tline";
tokens = tokenizer.tokenize(0, line);
assertTrue(tokens.length == 4);
assertEquals(tokens[0], "This rule");
assertEquals(tokens[1], "is not");
// Trying to set <code>null</code>
tokenizer.setProperty(null, null);
// Resetting to default values
tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.QUOTE_CHARS, null);
tokenizer.setProperty(DefaultLineTokenizer.PropertyKey.SEPARATOR_CHARS, null);
line = " This\t is \t a \tline ";
tokens = tokenizer.tokenize(0, line);
assertTrue(tokens.length == 4);
assertEquals(tokens[0], "This");
assertEquals(tokens[1], "is");
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment