diff --git a/screening/.classpath b/screening/.classpath index 817753190aaab4caf3fa0373b0b6b5d239349076..87afe39d49ea46a449310ecc50eef545237c08ba 100644 --- a/screening/.classpath +++ b/screening/.classpath @@ -30,5 +30,6 @@ <classpathentry kind="lib" path="/libraries/hibernate-search/jms.jar"/> <classpathentry kind="lib" path="/libraries/eodsql/eodsql.jar" sourcepath="/libraries/eodsql/eodsql_src.zip"/> <classpathentry kind="lib" path="/libraries/spring/test/spring-test.jar" sourcepath="/libraries/spring/test/src.jar"/> + <classpathentry kind="lib" path="/libraries/csv/csv.jar" sourcepath="/libraries/csv/src.zip"/> <classpathentry kind="output" path="targets/www/WEB-INF/classes"/> </classpath> diff --git a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/IScreeningLibraryColumnExtractor.java b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/IScreeningLibraryColumnExtractor.java new file mode 100644 index 0000000000000000000000000000000000000000..a7f888c1fb811fea0fc664a86dc86461e8f6d2f7 --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/IScreeningLibraryColumnExtractor.java @@ -0,0 +1,46 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.plugin.screening.transformers; + +import java.util.List; + +/** + * Provides structured information from one row of the library. + * + * @author Tomasz Pylak + */ +public interface IScreeningLibraryColumnExtractor +{ + public String getPlateCode(String[] row); + + public String getWellCode(String[] row); + + public String getRNASequence(String[] row); + + public String getOligoId(String[] row); + + public String getGeneId(String[] row); + + public String getGeneCode(String[] row); + + public String getGeneDescription(String[] row); + + public List<String> getAdditionalOligoPropertyNames(); + + public List<String> getAdditionalOligoPropertyValues(String[] row, List<String> columnNames); + +} \ No newline at end of file diff --git a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/LibraryEntityRegistrator.java b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/LibraryEntityRegistrator.java new file mode 100644 index 0000000000000000000000000000000000000000..c4fdfd53d948d831afbb5d09b27e4e9c0d25fa9a --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/LibraryEntityRegistrator.java @@ -0,0 +1,269 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.plugin.screening.transformers; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; + +/** + * Creates files to register genes, oligos and plate with wells. + * <p> + * This registrator works with the assumption that the entities which should be registered do not + * already exist in openBIS. If it is the case we should extend the implementation to fetch existing + * entities, LIBRARY_ID property can be used to recognize that. + * </p> + * + * @author Tomasz Pylak + */ +public class LibraryEntityRegistrator +{ + private static final String GENES_FILE_NAME = "genes.txt"; + + private static final String OLIGOS_FILE_NAME = "oligos.txt"; + + private static final String PLATES_FILE_NAME = "plates.txt"; + + private final GeneRegistrator geneRegistrator; + + private final OligoRegistrator oligoRegistrator; + + private final PlateRegistrator plateRegistrator; + + public LibraryEntityRegistrator(IScreeningLibraryColumnExtractor extractor, + String experimentIdentifier, String plateGeometry, String groupCode) throws IOException + { + this.geneRegistrator = new GeneRegistrator(new File(GENES_FILE_NAME)); + this.oligoRegistrator = + new OligoRegistrator(new File(OLIGOS_FILE_NAME), extractor + .getAdditionalOligoPropertyNames()); + this.plateRegistrator = + new PlateRegistrator(new File(PLATES_FILE_NAME), experimentIdentifier, + plateGeometry, groupCode); + } + + public void register(IScreeningLibraryColumnExtractor extractor, String[] row) + throws IOException + { + String geneId = geneRegistrator.register(extractor, row); + String oligoId = oligoRegistrator.register(extractor, row, geneId); + String plateId = plateRegistrator.registerPlate(extractor, row); + plateRegistrator.registerWell(extractor, row, plateId, oligoId); + } + + abstract static protected class AbstractMetadataRegistrator + { + private static final String TAB = "\t"; + + private final OutputStream stream; + + protected AbstractMetadataRegistrator(File file) throws FileNotFoundException + { + this.stream = new FileOutputStream(file); + } + + protected void writeLine(String... tokens) throws IOException + { + writeLine(join(tokens)); + } + + // joins token into one line adding separators in between + public static String join(String... tokens) + { + return StringUtils.join(tokens, TAB); + } + + private void writeLine(String line) throws IOException + { + IOUtils.writeLines(Arrays.asList(line), "\n", stream); + } + } + + private static class PlateRegistrator extends AbstractMetadataRegistrator + { + private static final String HEADER_PLATES = + "[PLATE]\n" + join("identifier", "experiment", "$PLATE_GEOMETRY"); + + private static final String HEADER_OLIGOS = + "[OLIGO_WELL]\n" + join("identifier", "container", "OLIGO"); + + private final Set<String/* plate code */> registeredPlates; + + private final String experimentIdentifier; + + private final String plateGeometry; + + private final String groupCode; + + // we register wells and plates in the same file. This flag tells us in which section we + // are, the one for plates or one for wells + private boolean lastRegisteredWasWell; + + public PlateRegistrator(File outputFile, String experimentIdentifier, String plateGeometry, + String groupCode) throws IOException + { + super(outputFile); + this.experimentIdentifier = experimentIdentifier; + this.plateGeometry = plateGeometry; + this.groupCode = groupCode; + this.registeredPlates = new HashSet<String>(); + lastRegisteredWasWell = false; + writeLine(HEADER_PLATES); + } + + /** @return sampleIdentifier */ + public String registerPlate(IScreeningLibraryColumnExtractor extractor, String[] row) + throws IOException + { + String plateCode = extractor.getPlateCode(row); + String sampleIdentifier = getSampleIdentifier(plateCode); + if (registeredPlates.contains(plateCode) == false) + { + if (lastRegisteredWasWell) + { + lastRegisteredWasWell = false; + writeLine(HEADER_PLATES); + } + writeLine(sampleIdentifier, experimentIdentifier, plateGeometry); + registeredPlates.add(plateCode); + } + return sampleIdentifier; + } + + private String getSampleIdentifier(String plateCode) + { + return "/" + groupCode + "/" + plateCode; + } + + public void registerWell(IScreeningLibraryColumnExtractor extractor, String[] row, + String plateId, String oligoId) throws IOException + { + if (lastRegisteredWasWell == false) + { + lastRegisteredWasWell = true; + writeLine(HEADER_OLIGOS); + } + String wellCode = extractor.getWellCode(row); + String wellIdentifier = plateId + ":" + wellCode; + String oligoMaterialProperty = oligoId + " (OLIGO)"; + writeLine(wellIdentifier, plateId, oligoMaterialProperty); + } + + } + + private static class GeneRegistrator extends AbstractMetadataRegistrator + { + private static final String HEADER = join("CODE", "DESCRIPTION", "LIBRARY_ID"); + + private final Set<String/* gene code */> registeredGenes; + + public GeneRegistrator(File genesFile) throws IOException + { + super(genesFile); + this.registeredGenes = new HashSet<String>(); + writeLine(HEADER); + } + + // / returns gene id + public String register(IScreeningLibraryColumnExtractor extractor, String[] row) + throws IOException + { + String geneSymbol = extractor.getGeneCode(row); + if (registeredGenes.contains(geneSymbol) == false) + { + String desc = extractor.getGeneDescription(row); + String libraryId = extractor.getGeneId(row); + writeLine(geneSymbol, desc, libraryId); + registeredGenes.add(geneSymbol); + } + return geneSymbol; + } + } + + private static class OligoRegistrator extends AbstractMetadataRegistrator + { + private static final String HEADER = + join("CODE", "NUCLEOTIDE_SEQUENCE", "INHIBITOR_OF", "LIBRARY_ID"); + + private final Set<String/* code */> registeredOligos; + + private final List<String> additionalPropertyNames; + + public OligoRegistrator(File file, List<String> additionalPropertyNames) throws IOException + { + super(file); + this.registeredOligos = new HashSet<String>(); + this.additionalPropertyNames = additionalPropertyNames; + writeLine(createHeader(additionalPropertyNames)); + } + + private static String createHeader(List<String> additionalPropertyNames) + { + String header = HEADER; + for (String propertyName : additionalPropertyNames) + { + header = join(header, propertyName); + } + return header; + } + + // / returns openbis id + public String register(IScreeningLibraryColumnExtractor extractor, String[] row, + String inhibitedGeneCode) throws IOException + { + String geneSymbol = extractor.getGeneCode(row); + String oligoId = extractor.getOligoId(row); + String openbisOligoId = geneSymbol + "_" + oligoId; + if (containsCaseInsensitive(registeredOligos, openbisOligoId) == false) + { + String seq = extractor.getRNASequence(row); + String geneMaterialProperty = inhibitedGeneCode + " (GENE)"; + String line = join(openbisOligoId, seq, geneMaterialProperty, oligoId); + // add additional properties + List<String> propertyValues = + extractor.getAdditionalOligoPropertyValues(row, additionalPropertyNames); + for (int i = 0; i < propertyValues.size(); i++) + { + line = join(line, propertyValues.get(i)); + } + + writeLine(line); + addCaseInsensitive(registeredOligos, openbisOligoId); + } + return openbisOligoId; + } + + private void addCaseInsensitive(Set<String> set, String value) + { + set.add(value.toLowerCase()); + } + + private boolean containsCaseInsensitive(Set<String> set, String value) + { + return set.contains(value.toLowerCase()); + } + } +} diff --git a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/QiagenScreeningLibraryColumnExtractor.java b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/QiagenScreeningLibraryColumnExtractor.java new file mode 100644 index 0000000000000000000000000000000000000000..461b93b0823630bc48fb68db5ae71fd02a910dd1 --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/QiagenScreeningLibraryColumnExtractor.java @@ -0,0 +1,216 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.plugin.screening.transformers; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import ch.systemsx.cisd.common.collections.CollectionUtils; +import ch.systemsx.cisd.common.exceptions.UserFailureException; + +/** + * Provides structured information from one row of the QIAGEN library. + * + * @author Tomasz Pylak + */ +public class QiagenScreeningLibraryColumnExtractor implements IScreeningLibraryColumnExtractor +{ + // ----- column names + + private final static String PLATE_NAME = "barcode"; + + private final static String WELL_ROW = "row"; + + private final static String WELL_COL = "col"; + + // gene + + private final static String GENE_ID = "geneId"; + + private final static String GENE_SYMBOL = "symbol"; + + private final static String GENE_DESC = "description"; + + // oligo + + private final static String RNA_SEQUENCE = "sirna"; + + private final static String OLIGO_ID = "productId"; + + private final static String[] ALL_COLUMNS = new String[] + { PLATE_NAME, WELL_ROW, WELL_COL, RNA_SEQUENCE, GENE_ID, GENE_SYMBOL, GENE_DESC, OLIGO_ID }; + + // ------------- + + private final Map<String/* column name */, Integer/* index in the header table */> columnIndices; + + private final Map<String/* column name */, Integer/* index in the header table */> unknownColumnIndices; + + public QiagenScreeningLibraryColumnExtractor(String[] headerTokens) + { + this.columnIndices = createColumnIndex(headerTokens); + this.unknownColumnIndices = getOmittedIndices(columnIndices, headerTokens); + } + + public List<String> getAdditionalOligoPropertyNames() + { + return new ArrayList<String>(unknownColumnIndices.keySet()); + } + + // ------------ + + private static Map<String, Integer> getOmittedIndices(Map<String, Integer> columnIndex, + String[] headers) + { + Map<String, Integer> omittedIndices = new HashMap<String, Integer>(); + Set<Integer> knownIndices = new HashSet<Integer>(columnIndex.values()); + for (int i = 0; i < headers.length; i++) + { + if (knownIndices.contains(i) == false) + { + omittedIndices.put(headers[i], i); + } + } + return omittedIndices; + } + + private static Map<String, Integer> createColumnIndex(String[] headers) + { + Map<String, Integer> map = new HashMap<String, Integer>(); + for (String columnName : ALL_COLUMNS) + { + findAndPut(map, headers, columnName); + } + return map; + } + + private static void findAndPut(Map<String, Integer> map, String[] headers, String columnName) + { + int ix = findIndexOrDie(headers, columnName); + map.put(columnName, ix); + } + + private static int findIndexOrDie(String[] headers, String columnName) + { + for (int i = 0; i < headers.length; i++) + { + if (headers[i].equalsIgnoreCase(columnName)) + { + return i; + } + } + throw new UserFailureException("Column " + columnName + " does not exist in " + + CollectionUtils.abbreviate(headers, -1)); + } + + private String getValue(String[] row, String columnName) + { + Integer ix = columnIndices.get(columnName); + return valueAt(row, ix); + } + + private static String valueAt(String[] row, Integer ix) + { + if (ix >= row.length) + { + return ""; + } else + { + return row[ix]; + } + } + + private String asCode(String value) + { + String code = ""; + for (int i = 0; i < value.length(); i++) + { + char ch = value.charAt(i); + if (isValidCodeCharacter(ch) == false) + { + ch = '_'; + } + code += ch; + } + return code; + } + + private boolean isValidCodeCharacter(char ch) + { + return Character.isLetterOrDigit(ch) || ch == '.' || ch == '-' || ch == '_'; + } + + private String getCodeValue(String[] row, String columnName) + { + return asCode(getValue(row, columnName)); + } + + // ------------ + + public String getPlateCode(String[] row) + { + return getCodeValue(row, PLATE_NAME); + } + + public String getWellCode(String[] row) + { + String wellRow = getValue(row, WELL_ROW); + String wellCol = getValue(row, WELL_COL); + return wellRow + wellCol; + } + + public String getRNASequence(String[] row) + { + return getValue(row, RNA_SEQUENCE); + } + + public String getOligoId(String[] row) + { + return getValue(row, OLIGO_ID); + } + + public String getGeneId(String[] row) + { + return getValue(row, GENE_ID); + } + + public String getGeneCode(String[] row) + { + return getCodeValue(row, GENE_SYMBOL); + } + + public String getGeneDescription(String[] row) + { + return getValue(row, GENE_DESC); + } + + public List<String> getAdditionalOligoPropertyValues(String[] row, List<String> columnNames) + { + List<String> values = new ArrayList<String>(); + for (String columnName : columnNames) + { + Integer ix = unknownColumnIndices.get(columnName); + String value = valueAt(row, ix); + values.add(value); + } + return values; + } +} diff --git a/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/ScreeningLibraryTransformer.java b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/ScreeningLibraryTransformer.java new file mode 100644 index 0000000000000000000000000000000000000000..1a22475d429af1a4557fdb144b1899c606c39bd3 --- /dev/null +++ b/screening/source/java/ch/systemsx/cisd/openbis/plugin/screening/transformers/ScreeningLibraryTransformer.java @@ -0,0 +1,94 @@ +/* + * Copyright 2010 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.plugin.screening.transformers; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.Charset; + +import com.csvreader.CsvReader; + +/** + * Transforms a screening library file and produces files which can be uploaded to openBIS: genes, + * oligos and plates with wells. + * + * @author Tomasz Pylak + */ +public class ScreeningLibraryTransformer +{ + private final static char SEPARATOR = ','; + + public static void main(String[] args) throws FileNotFoundException, IOException + { + if (args.length != 4) + { + error("Invalid parameters. Expected: " + + "<master-plate-file-path> <experiment-identifier> <plate-geometry> <group>"); + } + CsvReader csvReader = readFile(args[0]); + String experimentIdentifier = args[1]; + String plateGeometry = args[2]; + String groupCode = args[3]; + readLibrary(csvReader, experimentIdentifier, plateGeometry, groupCode); + csvReader.close(); + } + + private static void readLibrary(CsvReader csvReader, String experimentIdentifier, + String plateGeometry, String groupCode) throws IOException + { + System.out.println("Processing..."); + boolean headerPresent = csvReader.readRecord(); + if (headerPresent == false) + { + error("header not found"); + return; + } + String[] headers = csvReader.getValues(); + IScreeningLibraryColumnExtractor extractor = new QiagenScreeningLibraryColumnExtractor(headers); + LibraryEntityRegistrator registrator = + new LibraryEntityRegistrator(extractor, experimentIdentifier, plateGeometry, groupCode); + while (csvReader.readRecord()) + { + String[] row = csvReader.getValues(); + registrator.register(extractor, row); + } + System.out.println("Done, look for results in " + new File(".").getAbsolutePath()); + } + + private static CsvReader readFile(String path) throws FileNotFoundException, IOException + { + File masterPlatesFile = new File(path); + if (masterPlatesFile.isFile() == false) + { + error(masterPlatesFile + " does not exist or is not a file."); + } + FileInputStream fileInputStream = new FileInputStream(masterPlatesFile); + + CsvReader csvReader = new CsvReader(fileInputStream, Charset.defaultCharset()); + csvReader.setDelimiter(SEPARATOR); + csvReader.setSafetySwitch(false); + return csvReader; + } + + private static void error(String msg) + { + System.err.println(msg); + System.exit(1); + } +}