From a53c0aedcd3a46275c026f8ac16f1011aefa32ce Mon Sep 17 00:00:00 2001 From: ribeaudc <ribeaudc> Date: Mon, 21 May 2007 11:40:13 +0000 Subject: [PATCH] Adding first version of parser. SVN: 116 --- .../common/parser/DefaultLineTokenizer.java | 90 ++++++++++ .../common/parser/DefaultPropertyMapper.java | 55 ++++++ .../common/parser/DefaultReaderParser.java | 160 ++++++++++++++++++ .../parser/HeaderFilePropertyMapper.java | 66 ++++++++ .../cisd/common/parser/ILineFilter.java | 47 +++++ .../cisd/common/parser/ILineTokenizer.java | 44 +++++ .../common/parser/IParserObjectFactory.java | 61 +++++++ .../cisd/common/parser/IPropertyMapper.java | 66 ++++++++ .../common/parser/IPropertyMapperFactory.java | 29 ++++ .../cisd/common/parser/IReaderParser.java | 67 ++++++++ 10 files changed, 685 insertions(+) create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/DefaultPropertyMapper.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/DefaultReaderParser.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/HeaderFilePropertyMapper.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/ILineFilter.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/ILineTokenizer.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/IParserObjectFactory.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapper.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapperFactory.java create mode 100644 common/source/java/ch/systemsx/cisd/common/parser/IReaderParser.java diff --git a/common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java b/common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java new file mode 100644 index 00000000000..d1fb1446a00 --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/DefaultLineTokenizer.java @@ -0,0 +1,90 @@ +package ch.systemsx.cisd.common.parser; + +import java.util.EnumMap; +import java.util.Map; + +import org.apache.commons.lang.text.StrMatcher; +import org.apache.commons.lang.text.StrTokenizer; + +/** + * A default <code>ILineTokenizer</code> implementation that parses a line into an array of <code>String</code> + * objects. + * <p> + * The default separator chars used here are <code>\t</code>. If you want to change that, use + * {@link #setProperty(PropertyKey, String)} with corresponding property key defined here. + * </p> + * + * @author Christian Ribeaud + */ +public class DefaultLineTokenizer implements ILineTokenizer +{ + + /** Allowed <code>Properties</code> keys. */ + public static enum PropertyKey { + /** The property key for setting some delimiter characters. */ + SEPARATOR_CHARS, + /** The property key for setting some quote characters. */ + QUOTE_CHARS, + /** The property key for setting some ignored characters. */ + IGNORED_CHARS, + /** The property key for setting some trimmer characters. */ + TRIMMER_CHARS, + } + + /** Some properties for this tab parser. */ + private final Map<PropertyKey, String> properties; + + private StrTokenizer tokenizer; + + public DefaultLineTokenizer() + { + this.properties = new EnumMap<PropertyKey, String>(PropertyKey.class); + } + + /** + * Sets a property for this <code>TabReaderParser</code>. + * + * @throws IllegalArgumentException if given <code>key</code> could not found in {@link PropertyKey}. + */ + public final void setProperty(PropertyKey key, String value) + { + properties.put(key, value); + } + + /** Converts a defined <code>PropertyKey</code> into <code>StrMatcher</code>. */ + private final StrMatcher getStrMatcher(PropertyKey key, StrMatcher defaultMatcher) { + StrMatcher strMatcher = defaultMatcher; + String value = properties.get(key); + if (value != null) + { + strMatcher = StrMatcher.charSetMatcher(value); + } + return strMatcher; + } + + /////////////////////////////////////////////////////// + // ILineTokenizer + /////////////////////////////////////////////////////// + + public final void init() + { + StrTokenizer strTokenizer = new StrTokenizer(); + strTokenizer.setDelimiterMatcher(getStrMatcher(PropertyKey.SEPARATOR_CHARS, StrMatcher.tabMatcher())); + strTokenizer.setQuoteMatcher(getStrMatcher(PropertyKey.QUOTE_CHARS, StrMatcher.noneMatcher())); + strTokenizer.setTrimmerMatcher(getStrMatcher(PropertyKey.TRIMMER_CHARS, StrMatcher.trimMatcher())); + strTokenizer.setIgnoredMatcher(getStrMatcher(PropertyKey.IGNORED_CHARS, StrMatcher.noneMatcher())); + strTokenizer.setEmptyTokenAsNull(false); + strTokenizer.setIgnoreEmptyTokens(false); + this.tokenizer = strTokenizer; + } + + public final String[] tokenize(int lineNumber, String line) + { + return tokenizer.reset(line).getTokenArray(); + } + + public final void destroy() + { + tokenizer = null; + } +} \ No newline at end of file diff --git a/common/source/java/ch/systemsx/cisd/common/parser/DefaultPropertyMapper.java b/common/source/java/ch/systemsx/cisd/common/parser/DefaultPropertyMapper.java new file mode 100644 index 00000000000..1203bf2e668 --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/DefaultPropertyMapper.java @@ -0,0 +1,55 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * + * + * @author Christian Ribeaud + */ +public class DefaultPropertyMapper implements IPropertyMapper +{ + private final Map<String, Property> properties; + + DefaultPropertyMapper(List<Property> properties) { + this.properties = listToMap(properties); + } + + private final static Map<String, Property> listToMap(List<Property> properties) + { + Map<String, Property> map = new HashMap<String, Property>(properties.size()); + for (Property property : properties) + { + map.put(property.name, property); + } + return map; + } + + /////////////////////////////////////////////////////// + // IPropertyMapper + /////////////////////////////////////////////////////// + + public Property getProperty(String name) + { + return properties.get(name); + } + +} diff --git a/common/source/java/ch/systemsx/cisd/common/parser/DefaultReaderParser.java b/common/source/java/ch/systemsx/cisd/common/parser/DefaultReaderParser.java new file mode 100644 index 00000000000..421be80ab5f --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/DefaultReaderParser.java @@ -0,0 +1,160 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.List; + +import ch.systemsx.cisd.common.exceptions.CheckedExceptionTunnel; + +/** + * A default <code>IReaderParser</code> implementation. + * <p> + * The object type returned by this implementation is generic. This implementation defines a <code>ILineFilter</code> + * that filters out comment and empty lines. + * </p> + * + * @author Christian Ribeaud + */ +public class DefaultReaderParser<E> implements IReaderParser<E> +{ + /** + * A <code>LineFilter</code> implementation that filters out comment and empty lines (lines starting with '#'). + */ + private final static ILineFilter COMMENT_AND_EMPTY_LINE_FILTER = new ILineFilter() + { + /////////////////////////////////////////////////////// + // LineFilter + /////////////////////////////////////////////////////// + + public boolean acceptLine(String line) + { + String trimmed = line.trim(); + return trimmed.length() > 0 && trimmed.startsWith("#") == false; + } + }; + + private final ILineTokenizer lineTokenizer; + + private IParserObjectFactory<E> factory; + + private IPropertyMapperFactory mapperFactory; + + public DefaultReaderParser() + { + this(new DefaultLineTokenizer()); + } + + public DefaultReaderParser(ILineTokenizer lineTokenizer) + { + this.lineTokenizer = lineTokenizer; + } + + protected E createObject(String[] tokens) { + return factory.createObject(tokens); + } + + /** + * Parses given <code>line</code> into an element. + * <p> + * Uses <code>ILineTokenizer</code> to do its job. + * </p> + */ + protected String[] parseLine(int lineNumber, String line) + { + return lineTokenizer.tokenize(lineNumber, line); + } + + /** Inits the parsing. */ + protected void initParsing() { + // Inits <code>ILineTokenizer</code> + lineTokenizer.init(); + String line; + // + if (mapperFactory.getHeaderLine() < 0) + { + + } + } + + /////////////////////////////////////////////////////// + // Parser + /////////////////////////////////////////////////////// + + public final List<E> parse(Reader reader) + { + return parse(reader, COMMENT_AND_EMPTY_LINE_FILTER); + } + + public final List<E> parse(Reader reader, ILineFilter lineFilter) + { + BufferedReader bufferedReader; + if (reader instanceof BufferedReader) + { + bufferedReader = (BufferedReader) reader; + } else + { + bufferedReader = new BufferedReader(reader); + } + List<E> elements = new ArrayList<E>(); + synchronized (lineTokenizer) + { + initParsing(); + String line; + try + { + for (int lineNumber = 0; (line = bufferedReader.readLine()) != null; lineNumber++) + { + if (mapperFactory.getHeaderLine() > -1) + { + String[] tokens = parseLine(lineNumber, line); + factory.setPropertyMapper(mapperFactory.createPropertyMapper(tokens)); + continue; + } + if (lineFilter.acceptLine(line)) + { + String[] tokens = parseLine(lineNumber, line); + elements.add(createObject(tokens)); + } + } + return elements; + } catch (IOException ex) + { + throw new CheckedExceptionTunnel(ex); + } finally + { + // Destroys line tokenizer. + lineTokenizer.destroy(); + } + + } + + } + + public final void setObjectFactory(IParserObjectFactory<E> factory) + { + this.factory = factory; + } + + public final void setPropertyMapperFactory(IPropertyMapperFactory mapperFactory) + { + this.mapperFactory = mapperFactory; + } +} \ No newline at end of file diff --git a/common/source/java/ch/systemsx/cisd/common/parser/HeaderFilePropertyMapper.java b/common/source/java/ch/systemsx/cisd/common/parser/HeaderFilePropertyMapper.java new file mode 100644 index 00000000000..579c5d6adb4 --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/HeaderFilePropertyMapper.java @@ -0,0 +1,66 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; + +/** + * + * + * @author Christian Ribeaud + */ +public class HeaderFilePropertyMapper implements IPropertyMapper +{ + public final static char NAME_FORMAT_SEPARATOR = ','; + + private final Map<String, Property> properties; + + HeaderFilePropertyMapper(String[] headerTokens) { + this.properties = tokensToMap(headerTokens); + } + + private final static Map<String, Property> tokensToMap(String[] tokens) + { + Map<String, Property> map = new HashMap<String, Property>(tokens.length); + for (int i = 0; i < tokens.length; i++) + { + String token = tokens[i]; + String[] split = StringUtils.split(token, NAME_FORMAT_SEPARATOR); + String format = null; + if (split.length > 1) + { + format = split[1]; + } + map.put(token, new Property(i, split[0], format)); + } + return map; + } + + + /////////////////////////////////////////////////////// + // IPropertyMapper + /////////////////////////////////////////////////////// + + public Property getProperty(String name) + { + return properties.get(name); + } + +} diff --git a/common/source/java/ch/systemsx/cisd/common/parser/ILineFilter.java b/common/source/java/ch/systemsx/cisd/common/parser/ILineFilter.java new file mode 100644 index 00000000000..3b5f06b7aff --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/ILineFilter.java @@ -0,0 +1,47 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +/** + * A line filter for <code>ReaderParser</code>. + * + * @author Christian Ribeaud + */ +public interface ILineFilter +{ + + /** A default <code>LineFilter</code> implementation that accepts any line. */ + public final static ILineFilter ALWAYS_ACCEPT_LINE = new ILineFilter() + { + + /////////////////////////////////////////////////////// + // LineFilter + /////////////////////////////////////////////////////// + + public boolean acceptLine(String line) + { + return true; + } + }; + + /** + * If given <code>line</code> should be accepted or not. + * + * @param line the line read from the <code>Reader</code>. Can not be <code>null</code>. + */ + public boolean acceptLine(String line); +} diff --git a/common/source/java/ch/systemsx/cisd/common/parser/ILineTokenizer.java b/common/source/java/ch/systemsx/cisd/common/parser/ILineTokenizer.java new file mode 100644 index 00000000000..4d468526288 --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/ILineTokenizer.java @@ -0,0 +1,44 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +/** + * + * + * @author Christian Ribeaud + */ +public interface ILineTokenizer +{ + + /** + * Inits this <code>ILineTokenizer</code>. + * <p> + * Just gets called before parsing starts. + * </p> + */ + public void init(); + + public abstract String[] tokenize(int lineNumber, String line); + + /** + * Destroys resources used by this <code>IParserObjectFactory</code>. + * <p> + * Just gets called when parsing has finished. + * </p> + */ + public void destroy(); +} \ No newline at end of file diff --git a/common/source/java/ch/systemsx/cisd/common/parser/IParserObjectFactory.java b/common/source/java/ch/systemsx/cisd/common/parser/IParserObjectFactory.java new file mode 100644 index 00000000000..0183eeed7ae --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/IParserObjectFactory.java @@ -0,0 +1,61 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +/** + * Implementation know how to deal with given parsed text line and convert it into an appropriate <code>Object</code>. + * A <code>IParserObjectFactory</code> needs a <code>IPropertyMapper</code> to do its job. This + * <code>IPropertyMapper</code> helps to map <code>Object</code> properties to given tokens. + * <p> + * A <code>IParserObjectFactory</code> is typically registered in {@link IReaderParser}. + * </p> + * + * @author Christian Ribeaud + */ +public interface IParserObjectFactory<E> +{ + + /** + * This <code>IParserObjectFactory</code> implementation does nothing and returns the passed + * <code>lineTokens</code> as <code>String[]</code>. + * + * @author Christian Ribeaud + */ + public final static IParserObjectFactory<String[]> DO_NOTHING_OBJECT_FACTORY = new IParserObjectFactory<String[]>() + { + + // ///////////////////////////////////////////////////// + // IParserObjectFactory + // ///////////////////////////////////////////////////// + + public String[] createObject(String[] lineTokens) + { + return lineTokens; + } + + public void setPropertyMapper(IPropertyMapper propertyMapper) + { + } + }; + + /** + * Parses given text line and returns an appropriate <i>Object</i>. + */ + public E createObject(String[] lineTokens); + + public void setPropertyMapper(IPropertyMapper propertyMapper); +} \ No newline at end of file diff --git a/common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapper.java b/common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapper.java new file mode 100644 index 00000000000..838dda1e09f --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapper.java @@ -0,0 +1,66 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +import org.apache.commons.lang.builder.ToStringBuilder; + +/** + * + * + * @author Christian Ribeaud + */ +public interface IPropertyMapper +{ + + public Property getProperty(String name); + + /////////////////////////////////////////////////////// + // Helper Classes + /////////////////////////////////////////////////////// + + /** + * + * + * @author Christian Ribeaud + */ + public final static class Property { + + public final int column; + + public final String name; + + public final String format; + + protected Property(final int column, final String name, final String format) + { + this.column = column; + this.name = name; + this.format = format; + } + + /////////////////////////////////////////////////////// + // Object + /////////////////////////////////////////////////////// + + @Override + public final String toString() + { + return ToStringBuilder.reflectionToString(this); + } + + } +} \ No newline at end of file diff --git a/common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapperFactory.java b/common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapperFactory.java new file mode 100644 index 00000000000..d2c3c54de32 --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/IPropertyMapperFactory.java @@ -0,0 +1,29 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +/** + * + * + * @author Christian Ribeaud + */ +public interface IPropertyMapperFactory +{ + public int getHeaderLine(); + + public IPropertyMapper createPropertyMapper(String[] tokens); +} diff --git a/common/source/java/ch/systemsx/cisd/common/parser/IReaderParser.java b/common/source/java/ch/systemsx/cisd/common/parser/IReaderParser.java new file mode 100644 index 00000000000..7a110b7d79c --- /dev/null +++ b/common/source/java/ch/systemsx/cisd/common/parser/IReaderParser.java @@ -0,0 +1,67 @@ +/* + * Copyright 2007 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.common.parser; + +import java.io.BufferedReader; +import java.io.Reader; +import java.util.List; + +import ch.systemsx.cisd.common.exceptions.CheckedExceptionTunnel; + +/** + * <code>IReaderParser</code> is able to parse a given {@link Reader} and to returns <code>Object</code> instances. + * + * @author Christian Ribeaud + */ +public interface IReaderParser<E> +{ + + /** + * Parses given <code>Reader</code>. Encapsulates given <code>Reader</code> in a {@link BufferedReader} for + * better performance (if not already done). + * <p> + * Note that this does not close given <code>Parser</code>. It is your responsability to do so. + * </p> + * + * @param lineFilter you could define a filter for the lines found in given <code>reader</code>. + * @return a <code>List</code> of elements. + * @throws CheckedExceptionTunnel if an <code>IOException</code> occurs. + */ + public List<E> parse(Reader reader, ILineFilter lineFilter) throws CheckedExceptionTunnel; + + /** + * Parses given <code>Reader</code>. Encapsulates given <code>Reader</code> in a {@link BufferedReader} for + * better performance (if not already done). + * <p> + * Note that this does not close given <code>Parser</code>. It is your responsability to do so. + * </p> + * + * @return a <code>List</code> of elements. + * @throws CheckedExceptionTunnel if an <code>IOException</code> occurs. + */ + public List<E> parse(Reader reader) throws CheckedExceptionTunnel; + + /** + * Sets the <code>IParserObjectFactory</code>. + * <p> + * Typically, the given <code>factory</code> transforms a line into an element. + * </p> + */ + public void setObjectFactory(IParserObjectFactory<E> factory); + + public void setPropertyMapperFactory(IPropertyMapperFactory factory); +} -- GitLab