Skip to content
Snippets Groups Projects
Commit 6686cd89 authored by tpylak's avatar tpylak
Browse files

SE-180 QIAGEN lbrary transformer

SVN: 14744
parent 05f7f7ab
No related branches found
No related tags found
No related merge requests found
...@@ -30,5 +30,6 @@ ...@@ -30,5 +30,6 @@
<classpathentry kind="lib" path="/libraries/hibernate-search/jms.jar"/> <classpathentry kind="lib" path="/libraries/hibernate-search/jms.jar"/>
<classpathentry kind="lib" path="/libraries/eodsql/eodsql.jar" sourcepath="/libraries/eodsql/eodsql_src.zip"/> <classpathentry kind="lib" path="/libraries/eodsql/eodsql.jar" sourcepath="/libraries/eodsql/eodsql_src.zip"/>
<classpathentry kind="lib" path="/libraries/spring/test/spring-test.jar" sourcepath="/libraries/spring/test/src.jar"/> <classpathentry kind="lib" path="/libraries/spring/test/spring-test.jar" sourcepath="/libraries/spring/test/src.jar"/>
<classpathentry kind="lib" path="/libraries/csv/csv.jar" sourcepath="/libraries/csv/src.zip"/>
<classpathentry kind="output" path="targets/www/WEB-INF/classes"/> <classpathentry kind="output" path="targets/www/WEB-INF/classes"/>
</classpath> </classpath>
/*
* Copyright 2010 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.openbis.plugin.screening.transformers;
import java.util.List;
/**
* Provides structured information from one row of the library.
*
* @author Tomasz Pylak
*/
public interface IScreeningLibraryColumnExtractor
{
public String getPlateCode(String[] row);
public String getWellCode(String[] row);
public String getRNASequence(String[] row);
public String getOligoId(String[] row);
public String getGeneId(String[] row);
public String getGeneCode(String[] row);
public String getGeneDescription(String[] row);
public List<String> getAdditionalOligoPropertyNames();
public List<String> getAdditionalOligoPropertyValues(String[] row, List<String> columnNames);
}
\ No newline at end of file
/*
* Copyright 2010 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.openbis.plugin.screening.transformers;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
/**
* Creates files to register genes, oligos and plate with wells.
* <p>
* This registrator works with the assumption that the entities which should be registered do not
* already exist in openBIS. If it is the case we should extend the implementation to fetch existing
* entities, LIBRARY_ID property can be used to recognize that.
* </p>
*
* @author Tomasz Pylak
*/
public class LibraryEntityRegistrator
{
private static final String GENES_FILE_NAME = "genes.txt";
private static final String OLIGOS_FILE_NAME = "oligos.txt";
private static final String PLATES_FILE_NAME = "plates.txt";
private final GeneRegistrator geneRegistrator;
private final OligoRegistrator oligoRegistrator;
private final PlateRegistrator plateRegistrator;
public LibraryEntityRegistrator(IScreeningLibraryColumnExtractor extractor,
String experimentIdentifier, String plateGeometry, String groupCode) throws IOException
{
this.geneRegistrator = new GeneRegistrator(new File(GENES_FILE_NAME));
this.oligoRegistrator =
new OligoRegistrator(new File(OLIGOS_FILE_NAME), extractor
.getAdditionalOligoPropertyNames());
this.plateRegistrator =
new PlateRegistrator(new File(PLATES_FILE_NAME), experimentIdentifier,
plateGeometry, groupCode);
}
public void register(IScreeningLibraryColumnExtractor extractor, String[] row)
throws IOException
{
String geneId = geneRegistrator.register(extractor, row);
String oligoId = oligoRegistrator.register(extractor, row, geneId);
String plateId = plateRegistrator.registerPlate(extractor, row);
plateRegistrator.registerWell(extractor, row, plateId, oligoId);
}
abstract static protected class AbstractMetadataRegistrator
{
private static final String TAB = "\t";
private final OutputStream stream;
protected AbstractMetadataRegistrator(File file) throws FileNotFoundException
{
this.stream = new FileOutputStream(file);
}
protected void writeLine(String... tokens) throws IOException
{
writeLine(join(tokens));
}
// joins token into one line adding separators in between
public static String join(String... tokens)
{
return StringUtils.join(tokens, TAB);
}
private void writeLine(String line) throws IOException
{
IOUtils.writeLines(Arrays.asList(line), "\n", stream);
}
}
private static class PlateRegistrator extends AbstractMetadataRegistrator
{
private static final String HEADER_PLATES =
"[PLATE]\n" + join("identifier", "experiment", "$PLATE_GEOMETRY");
private static final String HEADER_OLIGOS =
"[OLIGO_WELL]\n" + join("identifier", "container", "OLIGO");
private final Set<String/* plate code */> registeredPlates;
private final String experimentIdentifier;
private final String plateGeometry;
private final String groupCode;
// we register wells and plates in the same file. This flag tells us in which section we
// are, the one for plates or one for wells
private boolean lastRegisteredWasWell;
public PlateRegistrator(File outputFile, String experimentIdentifier, String plateGeometry,
String groupCode) throws IOException
{
super(outputFile);
this.experimentIdentifier = experimentIdentifier;
this.plateGeometry = plateGeometry;
this.groupCode = groupCode;
this.registeredPlates = new HashSet<String>();
lastRegisteredWasWell = false;
writeLine(HEADER_PLATES);
}
/** @return sampleIdentifier */
public String registerPlate(IScreeningLibraryColumnExtractor extractor, String[] row)
throws IOException
{
String plateCode = extractor.getPlateCode(row);
String sampleIdentifier = getSampleIdentifier(plateCode);
if (registeredPlates.contains(plateCode) == false)
{
if (lastRegisteredWasWell)
{
lastRegisteredWasWell = false;
writeLine(HEADER_PLATES);
}
writeLine(sampleIdentifier, experimentIdentifier, plateGeometry);
registeredPlates.add(plateCode);
}
return sampleIdentifier;
}
private String getSampleIdentifier(String plateCode)
{
return "/" + groupCode + "/" + plateCode;
}
public void registerWell(IScreeningLibraryColumnExtractor extractor, String[] row,
String plateId, String oligoId) throws IOException
{
if (lastRegisteredWasWell == false)
{
lastRegisteredWasWell = true;
writeLine(HEADER_OLIGOS);
}
String wellCode = extractor.getWellCode(row);
String wellIdentifier = plateId + ":" + wellCode;
String oligoMaterialProperty = oligoId + " (OLIGO)";
writeLine(wellIdentifier, plateId, oligoMaterialProperty);
}
}
private static class GeneRegistrator extends AbstractMetadataRegistrator
{
private static final String HEADER = join("CODE", "DESCRIPTION", "LIBRARY_ID");
private final Set<String/* gene code */> registeredGenes;
public GeneRegistrator(File genesFile) throws IOException
{
super(genesFile);
this.registeredGenes = new HashSet<String>();
writeLine(HEADER);
}
// / returns gene id
public String register(IScreeningLibraryColumnExtractor extractor, String[] row)
throws IOException
{
String geneSymbol = extractor.getGeneCode(row);
if (registeredGenes.contains(geneSymbol) == false)
{
String desc = extractor.getGeneDescription(row);
String libraryId = extractor.getGeneId(row);
writeLine(geneSymbol, desc, libraryId);
registeredGenes.add(geneSymbol);
}
return geneSymbol;
}
}
private static class OligoRegistrator extends AbstractMetadataRegistrator
{
private static final String HEADER =
join("CODE", "NUCLEOTIDE_SEQUENCE", "INHIBITOR_OF", "LIBRARY_ID");
private final Set<String/* code */> registeredOligos;
private final List<String> additionalPropertyNames;
public OligoRegistrator(File file, List<String> additionalPropertyNames) throws IOException
{
super(file);
this.registeredOligos = new HashSet<String>();
this.additionalPropertyNames = additionalPropertyNames;
writeLine(createHeader(additionalPropertyNames));
}
private static String createHeader(List<String> additionalPropertyNames)
{
String header = HEADER;
for (String propertyName : additionalPropertyNames)
{
header = join(header, propertyName);
}
return header;
}
// / returns openbis id
public String register(IScreeningLibraryColumnExtractor extractor, String[] row,
String inhibitedGeneCode) throws IOException
{
String geneSymbol = extractor.getGeneCode(row);
String oligoId = extractor.getOligoId(row);
String openbisOligoId = geneSymbol + "_" + oligoId;
if (containsCaseInsensitive(registeredOligos, openbisOligoId) == false)
{
String seq = extractor.getRNASequence(row);
String geneMaterialProperty = inhibitedGeneCode + " (GENE)";
String line = join(openbisOligoId, seq, geneMaterialProperty, oligoId);
// add additional properties
List<String> propertyValues =
extractor.getAdditionalOligoPropertyValues(row, additionalPropertyNames);
for (int i = 0; i < propertyValues.size(); i++)
{
line = join(line, propertyValues.get(i));
}
writeLine(line);
addCaseInsensitive(registeredOligos, openbisOligoId);
}
return openbisOligoId;
}
private void addCaseInsensitive(Set<String> set, String value)
{
set.add(value.toLowerCase());
}
private boolean containsCaseInsensitive(Set<String> set, String value)
{
return set.contains(value.toLowerCase());
}
}
}
/*
* Copyright 2010 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.openbis.plugin.screening.transformers;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import ch.systemsx.cisd.common.collections.CollectionUtils;
import ch.systemsx.cisd.common.exceptions.UserFailureException;
/**
* Provides structured information from one row of the QIAGEN library.
*
* @author Tomasz Pylak
*/
public class QiagenScreeningLibraryColumnExtractor implements IScreeningLibraryColumnExtractor
{
// ----- column names
private final static String PLATE_NAME = "barcode";
private final static String WELL_ROW = "row";
private final static String WELL_COL = "col";
// gene
private final static String GENE_ID = "geneId";
private final static String GENE_SYMBOL = "symbol";
private final static String GENE_DESC = "description";
// oligo
private final static String RNA_SEQUENCE = "sirna";
private final static String OLIGO_ID = "productId";
private final static String[] ALL_COLUMNS = new String[]
{ PLATE_NAME, WELL_ROW, WELL_COL, RNA_SEQUENCE, GENE_ID, GENE_SYMBOL, GENE_DESC, OLIGO_ID };
// -------------
private final Map<String/* column name */, Integer/* index in the header table */> columnIndices;
private final Map<String/* column name */, Integer/* index in the header table */> unknownColumnIndices;
public QiagenScreeningLibraryColumnExtractor(String[] headerTokens)
{
this.columnIndices = createColumnIndex(headerTokens);
this.unknownColumnIndices = getOmittedIndices(columnIndices, headerTokens);
}
public List<String> getAdditionalOligoPropertyNames()
{
return new ArrayList<String>(unknownColumnIndices.keySet());
}
// ------------
private static Map<String, Integer> getOmittedIndices(Map<String, Integer> columnIndex,
String[] headers)
{
Map<String, Integer> omittedIndices = new HashMap<String, Integer>();
Set<Integer> knownIndices = new HashSet<Integer>(columnIndex.values());
for (int i = 0; i < headers.length; i++)
{
if (knownIndices.contains(i) == false)
{
omittedIndices.put(headers[i], i);
}
}
return omittedIndices;
}
private static Map<String, Integer> createColumnIndex(String[] headers)
{
Map<String, Integer> map = new HashMap<String, Integer>();
for (String columnName : ALL_COLUMNS)
{
findAndPut(map, headers, columnName);
}
return map;
}
private static void findAndPut(Map<String, Integer> map, String[] headers, String columnName)
{
int ix = findIndexOrDie(headers, columnName);
map.put(columnName, ix);
}
private static int findIndexOrDie(String[] headers, String columnName)
{
for (int i = 0; i < headers.length; i++)
{
if (headers[i].equalsIgnoreCase(columnName))
{
return i;
}
}
throw new UserFailureException("Column " + columnName + " does not exist in "
+ CollectionUtils.abbreviate(headers, -1));
}
private String getValue(String[] row, String columnName)
{
Integer ix = columnIndices.get(columnName);
return valueAt(row, ix);
}
private static String valueAt(String[] row, Integer ix)
{
if (ix >= row.length)
{
return "";
} else
{
return row[ix];
}
}
private String asCode(String value)
{
String code = "";
for (int i = 0; i < value.length(); i++)
{
char ch = value.charAt(i);
if (isValidCodeCharacter(ch) == false)
{
ch = '_';
}
code += ch;
}
return code;
}
private boolean isValidCodeCharacter(char ch)
{
return Character.isLetterOrDigit(ch) || ch == '.' || ch == '-' || ch == '_';
}
private String getCodeValue(String[] row, String columnName)
{
return asCode(getValue(row, columnName));
}
// ------------
public String getPlateCode(String[] row)
{
return getCodeValue(row, PLATE_NAME);
}
public String getWellCode(String[] row)
{
String wellRow = getValue(row, WELL_ROW);
String wellCol = getValue(row, WELL_COL);
return wellRow + wellCol;
}
public String getRNASequence(String[] row)
{
return getValue(row, RNA_SEQUENCE);
}
public String getOligoId(String[] row)
{
return getValue(row, OLIGO_ID);
}
public String getGeneId(String[] row)
{
return getValue(row, GENE_ID);
}
public String getGeneCode(String[] row)
{
return getCodeValue(row, GENE_SYMBOL);
}
public String getGeneDescription(String[] row)
{
return getValue(row, GENE_DESC);
}
public List<String> getAdditionalOligoPropertyValues(String[] row, List<String> columnNames)
{
List<String> values = new ArrayList<String>();
for (String columnName : columnNames)
{
Integer ix = unknownColumnIndices.get(columnName);
String value = valueAt(row, ix);
values.add(value);
}
return values;
}
}
/*
* Copyright 2010 ETH Zuerich, CISD
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.systemsx.cisd.openbis.plugin.screening.transformers;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.Charset;
import com.csvreader.CsvReader;
/**
* Transforms a screening library file and produces files which can be uploaded to openBIS: genes,
* oligos and plates with wells.
*
* @author Tomasz Pylak
*/
public class ScreeningLibraryTransformer
{
private final static char SEPARATOR = ',';
public static void main(String[] args) throws FileNotFoundException, IOException
{
if (args.length != 4)
{
error("Invalid parameters. Expected: "
+ "<master-plate-file-path> <experiment-identifier> <plate-geometry> <group>");
}
CsvReader csvReader = readFile(args[0]);
String experimentIdentifier = args[1];
String plateGeometry = args[2];
String groupCode = args[3];
readLibrary(csvReader, experimentIdentifier, plateGeometry, groupCode);
csvReader.close();
}
private static void readLibrary(CsvReader csvReader, String experimentIdentifier,
String plateGeometry, String groupCode) throws IOException
{
System.out.println("Processing...");
boolean headerPresent = csvReader.readRecord();
if (headerPresent == false)
{
error("header not found");
return;
}
String[] headers = csvReader.getValues();
IScreeningLibraryColumnExtractor extractor = new QiagenScreeningLibraryColumnExtractor(headers);
LibraryEntityRegistrator registrator =
new LibraryEntityRegistrator(extractor, experimentIdentifier, plateGeometry, groupCode);
while (csvReader.readRecord())
{
String[] row = csvReader.getValues();
registrator.register(extractor, row);
}
System.out.println("Done, look for results in " + new File(".").getAbsolutePath());
}
private static CsvReader readFile(String path) throws FileNotFoundException, IOException
{
File masterPlatesFile = new File(path);
if (masterPlatesFile.isFile() == false)
{
error(masterPlatesFile + " does not exist or is not a file.");
}
FileInputStream fileInputStream = new FileInputStream(masterPlatesFile);
CsvReader csvReader = new CsvReader(fileInputStream, Charset.defaultCharset());
csvReader.setDelimiter(SEPARATOR);
csvReader.setSafetySwitch(false);
return csvReader;
}
private static void error(String msg)
{
System.err.println(msg);
System.exit(1);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment