Skip to content
Snippets Groups Projects
Commit a53c0aed authored by ribeaudc's avatar ribeaudc
Browse files

Adding first version of parser.

SVN: 116
parent 699d3e71
No related branches found
No related tags found
No related merge requests found
Showing
with 685 additions and 0 deletions
package ch.systemsx.cisd.common.parser;
import java.util.EnumMap;
import java.util.Map;
import org.apache.commons.lang.text.StrMatcher;
import org.apache.commons.lang.text.StrTokenizer;
/**
* A default <code>ILineTokenizer</code> implementation that parses a line into an array of <code>String</code>
* objects.
* <p>
* The default separator chars used here are <code>\t</code>. If you want to change that, use
* {@link #setProperty(PropertyKey, String)} with corresponding property key defined here.
* </p>
*
* @author Christian Ribeaud
*/
public class DefaultLineTokenizer implements ILineTokenizer
{
/** Allowed <code>Properties</code> keys. */
public static enum PropertyKey {
/** The property key for setting some delimiter characters. */
SEPARATOR_CHARS,
/** The property key for setting some quote characters. */
QUOTE_CHARS,
/** The property key for setting some ignored characters. */
IGNORED_CHARS,
/** The property key for setting some trimmer characters. */
TRIMMER_CHARS,
}
/** Some properties for this tab parser. */
private final Map<PropertyKey, String> properties;
private StrTokenizer tokenizer;
public DefaultLineTokenizer()
{
this.properties = new EnumMap<PropertyKey, String>(PropertyKey.class);
}
/**
* Sets a property for this <code>TabReaderParser</code>.
*
* @throws IllegalArgumentException if given <code>key</code> could not found in {@link PropertyKey}.
*/
public final void setProperty(PropertyKey key, String value)
{
properties.put(key, value);
}
/** Converts a defined <code>PropertyKey</code> into <code>StrMatcher</code>. */
private final StrMatcher getStrMatcher(PropertyKey key, StrMatcher defaultMatcher) {
StrMatcher strMatcher = defaultMatcher;
String value = properties.get(key);
if (value != null)
{
strMatcher = StrMatcher.charSetMatcher(value);
}
return strMatcher;
}
///////////////////////////////////////////////////////
// ILineTokenizer
///////////////////////////////////////////////////////
public final void init()
{
StrTokenizer strTokenizer = new StrTokenizer();
strTokenizer.setDelimiterMatcher(getStrMatcher(PropertyKey.SEPARATOR_CHARS, StrMatcher.tabMatcher()));
strTokenizer.setQuoteMatcher(getStrMatcher(PropertyKey.QUOTE_CHARS, StrMatcher.noneMatcher()));
strTokenizer.setTrimmerMatcher(getStrMatcher(PropertyKey.TRIMMER_CHARS, StrMatcher.trimMatcher()));
strTokenizer.setIgnoredMatcher(getStrMatcher(PropertyKey.IGNORED_CHARS, StrMatcher.noneMatcher()));
strTokenizer.setEmptyTokenAsNull(false);
strTokenizer.setIgnoreEmptyTokens(false);
this.tokenizer = strTokenizer;
}
public final String[] tokenize(int lineNumber, String line)
{
return tokenizer.reset(line).getTokenArray();
}
public final void destroy()
{
tokenizer = null;
}
}
\ No newline at end of file
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
*
*
* @author Christian Ribeaud
*/
public class DefaultPropertyMapper implements IPropertyMapper
{
private final Map<String, Property> properties;
DefaultPropertyMapper(List<Property> properties) {
this.properties = listToMap(properties);
}
private final static Map<String, Property> listToMap(List<Property> properties)
{
Map<String, Property> map = new HashMap<String, Property>(properties.size());
for (Property property : properties)
{
map.put(property.name, property);
}
return map;
}
///////////////////////////////////////////////////////
// IPropertyMapper
///////////////////////////////////////////////////////
public Property getProperty(String name)
{
return properties.get(name);
}
}
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import ch.systemsx.cisd.common.exceptions.CheckedExceptionTunnel;
/**
* A default <code>IReaderParser</code> implementation.
* <p>
* The object type returned by this implementation is generic. This implementation defines a <code>ILineFilter</code>
* that filters out comment and empty lines.
* </p>
*
* @author Christian Ribeaud
*/
public class DefaultReaderParser<E> implements IReaderParser<E>
{
/**
* A <code>LineFilter</code> implementation that filters out comment and empty lines (lines starting with '#').
*/
private final static ILineFilter COMMENT_AND_EMPTY_LINE_FILTER = new ILineFilter()
{
///////////////////////////////////////////////////////
// LineFilter
///////////////////////////////////////////////////////
public boolean acceptLine(String line)
{
String trimmed = line.trim();
return trimmed.length() > 0 && trimmed.startsWith("#") == false;
}
};
private final ILineTokenizer lineTokenizer;
private IParserObjectFactory<E> factory;
private IPropertyMapperFactory mapperFactory;
public DefaultReaderParser()
{
this(new DefaultLineTokenizer());
}
public DefaultReaderParser(ILineTokenizer lineTokenizer)
{
this.lineTokenizer = lineTokenizer;
}
protected E createObject(String[] tokens) {
return factory.createObject(tokens);
}
/**
* Parses given <code>line</code> into an element.
* <p>
* Uses <code>ILineTokenizer</code> to do its job.
* </p>
*/
protected String[] parseLine(int lineNumber, String line)
{
return lineTokenizer.tokenize(lineNumber, line);
}
/** Inits the parsing. */
protected void initParsing() {
// Inits <code>ILineTokenizer</code>
lineTokenizer.init();
String line;
//
if (mapperFactory.getHeaderLine() < 0)
{
}
}
///////////////////////////////////////////////////////
// Parser
///////////////////////////////////////////////////////
public final List<E> parse(Reader reader)
{
return parse(reader, COMMENT_AND_EMPTY_LINE_FILTER);
}
public final List<E> parse(Reader reader, ILineFilter lineFilter)
{
BufferedReader bufferedReader;
if (reader instanceof BufferedReader)
{
bufferedReader = (BufferedReader) reader;
} else
{
bufferedReader = new BufferedReader(reader);
}
List<E> elements = new ArrayList<E>();
synchronized (lineTokenizer)
{
initParsing();
String line;
try
{
for (int lineNumber = 0; (line = bufferedReader.readLine()) != null; lineNumber++)
{
if (mapperFactory.getHeaderLine() > -1)
{
String[] tokens = parseLine(lineNumber, line);
factory.setPropertyMapper(mapperFactory.createPropertyMapper(tokens));
continue;
}
if (lineFilter.acceptLine(line))
{
String[] tokens = parseLine(lineNumber, line);
elements.add(createObject(tokens));
}
}
return elements;
} catch (IOException ex)
{
throw new CheckedExceptionTunnel(ex);
} finally
{
// Destroys line tokenizer.
lineTokenizer.destroy();
}
}
}
public final void setObjectFactory(IParserObjectFactory<E> factory)
{
this.factory = factory;
}
public final void setPropertyMapperFactory(IPropertyMapperFactory mapperFactory)
{
this.mapperFactory = mapperFactory;
}
}
\ No newline at end of file
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
/**
*
*
* @author Christian Ribeaud
*/
public class HeaderFilePropertyMapper implements IPropertyMapper
{
public final static char NAME_FORMAT_SEPARATOR = ',';
private final Map<String, Property> properties;
HeaderFilePropertyMapper(String[] headerTokens) {
this.properties = tokensToMap(headerTokens);
}
private final static Map<String, Property> tokensToMap(String[] tokens)
{
Map<String, Property> map = new HashMap<String, Property>(tokens.length);
for (int i = 0; i < tokens.length; i++)
{
String token = tokens[i];
String[] split = StringUtils.split(token, NAME_FORMAT_SEPARATOR);
String format = null;
if (split.length > 1)
{
format = split[1];
}
map.put(token, new Property(i, split[0], format));
}
return map;
}
///////////////////////////////////////////////////////
// IPropertyMapper
///////////////////////////////////////////////////////
public Property getProperty(String name)
{
return properties.get(name);
}
}
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
/**
* A line filter for <code>ReaderParser</code>.
*
* @author Christian Ribeaud
*/
public interface ILineFilter
{
/** A default <code>LineFilter</code> implementation that accepts any line. */
public final static ILineFilter ALWAYS_ACCEPT_LINE = new ILineFilter()
{
///////////////////////////////////////////////////////
// LineFilter
///////////////////////////////////////////////////////
public boolean acceptLine(String line)
{
return true;
}
};
/**
* If given <code>line</code> should be accepted or not.
*
* @param line the line read from the <code>Reader</code>. Can not be <code>null</code>.
*/
public boolean acceptLine(String line);
}
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
/**
*
*
* @author Christian Ribeaud
*/
public interface ILineTokenizer
{
/**
* Inits this <code>ILineTokenizer</code>.
* <p>
* Just gets called before parsing starts.
* </p>
*/
public void init();
public abstract String[] tokenize(int lineNumber, String line);
/**
* Destroys resources used by this <code>IParserObjectFactory</code>.
* <p>
* Just gets called when parsing has finished.
* </p>
*/
public void destroy();
}
\ No newline at end of file
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
/**
* Implementation know how to deal with given parsed text line and convert it into an appropriate <code>Object</code>.
* A <code>IParserObjectFactory</code> needs a <code>IPropertyMapper</code> to do its job. This
* <code>IPropertyMapper</code> helps to map <code>Object</code> properties to given tokens.
* <p>
* A <code>IParserObjectFactory</code> is typically registered in {@link IReaderParser}.
* </p>
*
* @author Christian Ribeaud
*/
public interface IParserObjectFactory<E>
{
/**
* This <code>IParserObjectFactory</code> implementation does nothing and returns the passed
* <code>lineTokens</code> as <code>String[]</code>.
*
* @author Christian Ribeaud
*/
public final static IParserObjectFactory<String[]> DO_NOTHING_OBJECT_FACTORY = new IParserObjectFactory<String[]>()
{
// /////////////////////////////////////////////////////
// IParserObjectFactory
// /////////////////////////////////////////////////////
public String[] createObject(String[] lineTokens)
{
return lineTokens;
}
public void setPropertyMapper(IPropertyMapper propertyMapper)
{
}
};
/**
* Parses given text line and returns an appropriate <i>Object</i>.
*/
public E createObject(String[] lineTokens);
public void setPropertyMapper(IPropertyMapper propertyMapper);
}
\ No newline at end of file
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
import org.apache.commons.lang.builder.ToStringBuilder;
/**
*
*
* @author Christian Ribeaud
*/
public interface IPropertyMapper
{
public Property getProperty(String name);
///////////////////////////////////////////////////////
// Helper Classes
///////////////////////////////////////////////////////
/**
*
*
* @author Christian Ribeaud
*/
public final static class Property {
public final int column;
public final String name;
public final String format;
protected Property(final int column, final String name, final String format)
{
this.column = column;
this.name = name;
this.format = format;
}
///////////////////////////////////////////////////////
// Object
///////////////////////////////////////////////////////
@Override
public final String toString()
{
return ToStringBuilder.reflectionToString(this);
}
}
}
\ No newline at end of file
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
/**
*
*
* @author Christian Ribeaud
*/
public interface IPropertyMapperFactory
{
public int getHeaderLine();
public IPropertyMapper createPropertyMapper(String[] tokens);
}
/*
* Copyright 2007 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.common.parser;
import java.io.BufferedReader;
import java.io.Reader;
import java.util.List;
import ch.systemsx.cisd.common.exceptions.CheckedExceptionTunnel;
/**
* <code>IReaderParser</code> is able to parse a given {@link Reader} and to returns <code>Object</code> instances.
*
* @author Christian Ribeaud
*/
public interface IReaderParser<E>
{
/**
* Parses given <code>Reader</code>. Encapsulates given <code>Reader</code> in a {@link BufferedReader} for
* better performance (if not already done).
* <p>
* Note that this does not close given <code>Parser</code>. It is your responsability to do so.
* </p>
*
* @param lineFilter you could define a filter for the lines found in given <code>reader</code>.
* @return a <code>List</code> of elements.
* @throws CheckedExceptionTunnel if an <code>IOException</code> occurs.
*/
public List<E> parse(Reader reader, ILineFilter lineFilter) throws CheckedExceptionTunnel;
/**
* Parses given <code>Reader</code>. Encapsulates given <code>Reader</code> in a {@link BufferedReader} for
* better performance (if not already done).
* <p>
* Note that this does not close given <code>Parser</code>. It is your responsability to do so.
* </p>
*
* @return a <code>List</code> of elements.
* @throws CheckedExceptionTunnel if an <code>IOException</code> occurs.
*/
public List<E> parse(Reader reader) throws CheckedExceptionTunnel;
/**
* Sets the <code>IParserObjectFactory</code>.
* <p>
* Typically, the given <code>factory</code> transforms a line into an element.
* </p>
*/
public void setObjectFactory(IParserObjectFactory<E> factory);
public void setPropertyMapperFactory(IPropertyMapperFactory factory);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment