From d818c852df2abfb38da3550c3d07e17fbc95b7bf Mon Sep 17 00:00:00 2001
From: brinn <brinn>
Date: Mon, 18 May 2009 06:39:58 +0000
Subject: [PATCH] [SE-112] add: some code to parse eicML files and write them
 into the database

SVN: 11069
---
 rtd_yeastx/.classpath                         |  13 ++
 rtd_yeastx/.project                           |  17 ++
 rtd_yeastx/{ => .settings}/.gitignore         |   0
 .../cisd/yeastx/eicml/ChromatogramDTO.java    |  52 +++++
 .../systemsx/cisd/yeastx/eicml/DBFactory.java |  50 +++++
 .../cisd/yeastx/eicml/EICML2Database.java     |  79 ++++++++
 .../cisd/yeastx/eicml/EICML2TabConverter.java | 162 ++++++++++++++++
 .../yeastx/eicml/EICMLFilenameFilter.java     |  33 ++++
 .../cisd/yeastx/eicml/EICMLParser.java        | 180 ++++++++++++++++++
 .../cisd/yeastx/eicml/FloatArrayMapper.java   |  75 ++++++++
 .../systemsx/cisd/yeastx/eicml/IMSRunDAO.java |  84 ++++++++
 .../yeastx/eicml/ListChromatogramLabels.java  |  75 ++++++++
 .../systemsx/cisd/yeastx/eicml/MSRunDTO.java  | 102 ++++++++++
 rtd_yeastx/source/sql/schema.sql              |  38 ++++
 .../ReadChromatogramsPerformanceTest.java     |  53 ++++++
 15 files changed, 1013 insertions(+)
 create mode 100644 rtd_yeastx/.classpath
 create mode 100644 rtd_yeastx/.project
 rename rtd_yeastx/{ => .settings}/.gitignore (100%)
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ChromatogramDTO.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/DBFactory.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2Database.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2TabConverter.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLFilenameFilter.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLParser.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/FloatArrayMapper.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/IMSRunDAO.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ListChromatogramLabels.java
 create mode 100644 rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/MSRunDTO.java
 create mode 100644 rtd_yeastx/source/sql/schema.sql
 create mode 100644 rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/eicml/ReadChromatogramsPerformanceTest.java

diff --git a/rtd_yeastx/.classpath b/rtd_yeastx/.classpath
new file mode 100644
index 00000000000..cca51acedab
--- /dev/null
+++ b/rtd_yeastx/.classpath
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="source/java"/>
+	<classpathentry kind="src" path="sourceTest/java"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+	<classpathentry kind="lib" path="/libraries/commons-codec/commons-codec.jar" sourcepath="/libraries/commons-codec/src.zip"/>
+	<classpathentry kind="lib" path="/libraries/commons-io/commons-io.jar" sourcepath="/libraries/commons-io/src.zip"/>
+	<classpathentry kind="lib" path="/libraries/commons-lang/commons-lang.jar" sourcepath="/libraries/commons-lang/src.zip"/>
+	<classpathentry kind="lib" path="/libraries/cisd-base/cisd-base.jar" sourcepath="/libraries/cisd-base/cisd-base-src.zip"/>
+	<classpathentry kind="lib" path="/libraries/postgresql/postgresql.jar" sourcepath="/libraries/postgresql/postgresql-src.zip"/>
+	<classpathentry kind="lib" path="/libraries/eodsql/eodsql.jar" sourcepath="/libraries/eodsql/eodsql_src.zip"/>
+	<classpathentry kind="output" path="targets/classes"/>
+</classpath>
diff --git a/rtd_yeastx/.project b/rtd_yeastx/.project
new file mode 100644
index 00000000000..a5a0d918eb0
--- /dev/null
+++ b/rtd_yeastx/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>rtd_yeastx</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
diff --git a/rtd_yeastx/.gitignore b/rtd_yeastx/.settings/.gitignore
similarity index 100%
rename from rtd_yeastx/.gitignore
rename to rtd_yeastx/.settings/.gitignore
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ChromatogramDTO.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ChromatogramDTO.java
new file mode 100644
index 00000000000..cecd4b786b8
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ChromatogramDTO.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import net.lemnik.eodsql.AutoGeneratedKeys;
+
+import org.apache.commons.lang.builder.ToStringBuilder;
+
+/**
+ * A class that represents a chromatogram in an eicML file.
+ */
+public class ChromatogramDTO
+{
+    @AutoGeneratedKeys
+    public long chromId;
+
+    public long msRunId;
+
+    public float q1Mz = Float.NaN;
+
+    public float q3LowMz = Float.NaN;
+
+    public float q3HighMz = Float.NaN;
+
+    public String label;
+
+    public char polarity = '?';
+
+    public float[] runTimes;
+
+    public float[] intensities;
+
+    @Override
+    public String toString()
+    {
+        return ToStringBuilder.reflectionToString(this);
+    }
+}
\ No newline at end of file
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/DBFactory.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/DBFactory.java
new file mode 100644
index 00000000000..3118e909354
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/DBFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+
+import net.lemnik.eodsql.QueryTool;
+
+/**
+ * Factory for database objects.
+ *
+ * @author Bernd Rinn
+ */
+public class DBFactory
+{
+    static
+    {
+        QueryTool.getTypeMap().put(float[].class, new FloatArrayMapper());
+    }
+
+    public static Connection getConnection() throws SQLException
+    {
+        final Connection conn =
+            DriverManager.getConnection("jdbc:postgresql:metabol", System.getProperties()
+                    .getProperty("user.name"), "");
+        conn.setAutoCommit(false);
+        return conn;
+    }
+
+    public static IMSRunDAO getDAO(Connection conn)
+    {
+        return QueryTool.getQuery(conn, IMSRunDAO.class);
+    }
+}
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2Database.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2Database.java
new file mode 100644
index 00000000000..89d66a975c1
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2Database.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.xml.sax.SAXException;
+
+import ch.systemsx.cisd.yeastx.eicml.EICMLParser.IChromatogramObserver;
+import ch.systemsx.cisd.yeastx.eicml.EICMLParser.IMSRunObserver;
+
+/**
+ * Tool for uploading eicML files to the database.
+ * 
+ * @author Bernd Rinn
+ */
+public class EICML2Database
+{
+
+    public static void main(String[] args) throws ParserConfigurationException, SAXException,
+            IOException, SQLException
+    {
+        final Connection conn = DBFactory.getConnection();
+        try
+        {
+            final String dir = args[0];
+            int permId = 0;
+            for (String f : new File(dir).list(new EICMLFilenameFilter()))
+            {
+                final long[] id = new long[1];
+                try
+                {
+                    final IMSRunDAO dao = DBFactory.getDAO(conn);
+                    new EICMLParser(dir + "/" + f, Integer.toString(++permId), new IMSRunObserver()
+                        {
+                            public void observe(MSRunDTO run)
+                            {
+                                id[0] = dao.addMSRun(run);
+                            }
+                        }, new IChromatogramObserver()
+                        {
+                            public void observe(ChromatogramDTO chromatogram)
+                            {
+                                chromatogram.msRunId = id[0];
+                                dao.addChromatogram(chromatogram);
+                            }
+                        });
+                    conn.commit();
+                } catch (Throwable th)
+                {
+                    conn.rollback();
+                    th.printStackTrace();
+                }
+            }
+        } finally
+        {
+            conn.close();
+        }
+    }
+}
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2TabConverter.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2TabConverter.java
new file mode 100644
index 00000000000..63ce5cce5c8
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICML2TabConverter.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.commons.lang.StringUtils;
+import org.xml.sax.SAXException;
+
+/**
+ * A converter from eicML files to tab files.
+ * 
+ * @author Bernd Rinn
+ */
+public class EICML2TabConverter
+{
+
+    public static void main(String[] args) throws IOException, ParserConfigurationException,
+            SAXException
+    {
+        final String dir = args[0];
+        for (String f : new File(dir).list(new EICMLFilenameFilter()))
+        {
+            final String[] msRunName = new String[1];
+            new EICMLParser(dir + "/" + f, null, new EICMLParser.IMSRunObserver()
+                {
+                    public void observe(MSRunDTO run)
+                    {
+                        String runName = run.rawDataFileName;
+                        if (runName.endsWith(".RAW"))
+                        {
+                            runName = runName.substring(0, runName.length() - 4);
+                        }
+                        String runDir = dir + "/" + runName;
+                        new File(runDir).mkdir();
+                        msRunName[0] = runDir;
+                        PrintStream out;
+                        try
+                        {
+                            out =
+                                    new PrintStream(new File(runDir + "/msrun_" + runName
+                                            + ".tsv"));
+                            if (StringUtils.isNotBlank(run.rawDataFilePath))
+                            {
+                                out.println("filePath\t" + run.rawDataFilePath);
+                            }
+                            if (StringUtils.isNotBlank(run.rawDataFileName))
+                            {
+                                out.println("fileName\t" + run.rawDataFileName);
+                            }
+                            if (StringUtils.isNotBlank(run.instrumentType))
+                            {
+                                out.println("instrumentType\t" + run.instrumentType);
+                            }
+                            if (StringUtils.isNotBlank(run.instrumentManufacturer))
+                            {
+                                out.println("instrumentManufacturer\t"
+                                        + run.instrumentManufacturer);
+                            }
+                            if (StringUtils.isNotBlank(run.instrumentModel))
+                            {
+                                out.println("instrumentModel\t" + run.instrumentModel);
+                            }
+                            if (StringUtils.isNotBlank(run.methodIonisation))
+                            {
+                                out.println("methodIonisation\t" + run.methodIonisation);
+                            }
+                            if (StringUtils.isNotBlank(run.methodSeparation))
+                            {
+                                out.println("methodSeparation\t" + run.methodSeparation);
+                            }
+                            if (StringUtils.isNotBlank(run.acquisitionDate))
+                            {
+                                out.println("acquisitionDate\t" + run.acquisitionDate);
+                            }
+                            if (run.chromCount >= 0)
+                            {
+                                out.println("chromCount\t" + run.chromCount);
+                            }
+                            if (Float.isNaN(run.startTime) == false)
+                            {
+                                out.println("startTime\t" + run.startTime);
+                            }
+                            if (Float.isNaN(run.endTime) == false)
+                            {
+                                out.println("endTime\t" + run.endTime);
+                            }
+                            if (run.msRunId >= 0)
+                            {
+                                out.println("msRunId\t" + run.msRunId);
+                            }
+                            out.close();
+                        } catch (FileNotFoundException ex)
+                        {
+                            ex.printStackTrace();
+                        }
+                    }
+                }, new EICMLParser.IChromatogramObserver()
+                {
+                    public void observe(ChromatogramDTO chromatogram)
+                    {
+                        PrintStream out;
+                        try
+                        {
+                            out =
+                                    new PrintStream(new File(msRunName[0] + "/"
+                                            + chromatogram.label + ".tsv"));
+                            if (Float.isNaN(chromatogram.q1Mz) == false)
+                            {
+                                out.println("#\tQ1Mz\t" + chromatogram.q1Mz);
+                            }
+                            if (Float.isNaN(chromatogram.q3LowMz) == false)
+                            {
+                                out.println("#\tQ3LowMz\t" + chromatogram.q3LowMz);
+                            }
+                            if (Float.isNaN(chromatogram.q3HighMz) == false)
+                            {
+                                out.println("#\tQ3HighMz\t" + chromatogram.q3HighMz);
+                            }
+                            if (StringUtils.isNotBlank(chromatogram.label))
+                            {
+                                out.println("#\tLabel\t" + chromatogram.label);
+                            }
+                            if (chromatogram.polarity != '\0')
+                            {
+                                out.println("#\tPolarity\t" + chromatogram.polarity);
+                            }
+                            for (int i = 0; i < chromatogram.runTimes.length; ++i)
+                            {
+                                out.println(chromatogram.runTimes[i] + "\t"
+                                        + chromatogram.intensities[i]);
+                            }
+                            out.close();
+                        } catch (FileNotFoundException ex)
+                        {
+                            ex.printStackTrace();
+                        }
+                    }
+                });
+        }
+    }
+
+}
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLFilenameFilter.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLFilenameFilter.java
new file mode 100644
index 00000000000..44940b080f6
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLFilenameFilter.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.io.File;
+import java.io.FilenameFilter;
+
+/**
+ * A {@link FilenameFilter} for <code>eicML</code> files.
+ *
+ * @author Bernd Rinn
+ */
+final class EICMLFilenameFilter implements FilenameFilter
+{
+    public boolean accept(File myDir, String name)
+    {
+        return name.endsWith(".eicML");
+    }
+}
\ No newline at end of file
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLParser.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLParser.java
new file mode 100644
index 00000000000..e9e334bc00c
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/EICMLParser.java
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.io.IOException;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+
+import org.apache.commons.codec.binary.Base64;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import ch.systemsx.cisd.base.convert.NativeData;
+import ch.systemsx.cisd.base.convert.NativeData.ByteOrder;
+
+
+/**
+ * A file for parsing <code>eicML</code> files.
+ * 
+ * @author Bernd Rinn
+ */
+public class EICMLParser extends DefaultHandler
+{
+
+    /** A role that observes {@Link MSRun}s. */
+    public interface IMSRunObserver
+    {
+        void observe(MSRunDTO run);
+    }
+
+    /** A role that observes {@Link Chromatogram}s. */
+    public interface IChromatogramObserver
+    {
+        void observe(ChromatogramDTO chromatogram);
+    }
+
+    private StringBuilder buffer = new StringBuilder();
+
+    private String permIdOrNull;
+    
+    private MSRunDTO msRun;
+
+    private ChromatogramDTO chromatogram;
+
+    private boolean parsingMsRun;
+
+    private boolean parsingChromatogram;
+
+    private final IMSRunObserver msRunObserverOrNull;
+
+    private final IChromatogramObserver chromatogramObserverOrNull;
+
+    public EICMLParser(String fileName, String permIdOrNull, IMSRunObserver msRunObserverOrNull,
+            IChromatogramObserver chromatogramObserverOrNull) throws ParserConfigurationException,
+            SAXException, IOException
+    {
+        assert fileName != null;
+
+        this.permIdOrNull = permIdOrNull;
+        this.msRunObserverOrNull = msRunObserverOrNull;
+        this.chromatogramObserverOrNull = chromatogramObserverOrNull;
+        parseDocument(fileName);
+    }
+
+    private void parseDocument(String fileName) throws ParserConfigurationException, SAXException,
+            IOException
+    {
+        final SAXParserFactory spf = SAXParserFactory.newInstance();
+        final SAXParser sp = spf.newSAXParser();
+        sp.parse(fileName, this);
+    }
+
+    @Override
+    public void startElement(String uri, String localName, String name, Attributes attributes)
+            throws SAXException
+    {
+        buffer.setLength(0);
+        if ("msRun".equals(name))
+        {
+            msRun = new MSRunDTO();
+            msRun.permId = permIdOrNull;
+            parsingMsRun = true;
+        } else if ("chromatogram".equals(name))
+        {
+            chromatogram = new ChromatogramDTO();
+            parsingChromatogram = true;
+        }
+    }
+
+    private float[] convert(String b64)
+    {
+        final byte[] decoded = Base64.decodeBase64(b64.getBytes());
+        return NativeData.byteToFloat(decoded, ByteOrder.BIG_ENDIAN);
+    }
+
+    void set(String name, String value) throws SAXException
+    {
+        if ("Q1Mz".equals(name) && value.length() > 0)
+        {
+            chromatogram.q1Mz = Float.parseFloat(value);
+        } else if ("Q3LowMz".equals(name) && value.length() > 0)
+        {
+            chromatogram.q3LowMz = Float.parseFloat(value);
+        } else if ("Q3HighMz".equals(name) && value.length() > 0)
+        {
+            chromatogram.q3HighMz = Float.parseFloat(value);
+        } else if ("label".equals(name) && value.length() > 0)
+        {
+            chromatogram.label = value;
+        } else if ("polarity".equals(name) && value.length() > 0)
+        {
+            if (value.length() != 1)
+            {
+                throw new SAXException("Illegal polarity: must be of length 1");
+            }
+            chromatogram.polarity = value.charAt(0);
+        } else if ("RT".equals(name) && value.length() > 0)
+        {
+            chromatogram.runTimes = convert(value);
+        } else if ("INT".equals(name) && value.length() > 0)
+        {
+            chromatogram.intensities = convert(value);
+        }
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String name) throws SAXException
+    {
+        if ("msRun".equals(name))
+        {
+            parsingMsRun = false;
+            if (msRunObserverOrNull != null)
+            {
+                msRunObserverOrNull.observe(msRun);
+            }
+            msRun = null;
+        } else if ("chromatogram".equals(name))
+        {
+            parsingChromatogram = false;
+            if (chromatogramObserverOrNull != null)
+            {
+                chromatogramObserverOrNull.observe(chromatogram);
+            }
+            chromatogram = null;
+        }
+        if (parsingMsRun && msRun != null)
+        {
+            msRun.set(name, buffer.toString());
+        } else if (parsingChromatogram && chromatogram != null)
+        {
+            set(name, buffer.toString());
+        }
+        buffer.setLength(0);
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length) throws SAXException
+    {
+        buffer.append(new String(ch, start, length));
+    }
+
+}
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/FloatArrayMapper.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/FloatArrayMapper.java
new file mode 100644
index 00000000000..3d9ccffc22e
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/FloatArrayMapper.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Types;
+
+import net.lemnik.eodsql.TypeMapper;
+
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * {@link TypeMapper} for <code>float[]</var> columns.
+ * 
+ * @author Bernd Rinn
+ */
+class FloatArrayMapper implements TypeMapper<float[]>
+{
+
+    public float[] get(ResultSet results, int column) throws SQLException
+    {
+        final String[] floatStr = StringUtils.split(results.getString(column), ',');
+        final float[] floatArr = new float[floatStr.length];
+        for (int i = 0; i < floatStr.length; ++i)
+        {
+            floatArr[i] = Float.parseFloat(floatStr[i]);
+        }
+        return floatArr;
+    }
+
+    public void set(ResultSet results, int column, float[] obj) throws SQLException
+    {
+        results.updateString(column, toString(obj));
+    }
+
+    public void set(PreparedStatement statement, int column, float[] obj) throws SQLException
+    {
+        if (obj != null)
+        {
+            statement.setString(column, toString(obj));
+        } else
+        {
+            statement.setNull(column, Types.VARCHAR);
+        }
+    }
+
+    private String toString(float[] array)
+    {
+        StringBuilder b = new StringBuilder();
+        for (float v : array)
+        {
+            b.append(v);
+            b.append(',');
+        }
+        b.setLength(b.length() - 1);
+        return b.toString();
+    }
+
+}
\ No newline at end of file
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/IMSRunDAO.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/IMSRunDAO.java
new file mode 100644
index 00000000000..a1840e54ea2
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/IMSRunDAO.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import net.lemnik.eodsql.BaseQuery;
+import net.lemnik.eodsql.DataIterator;
+import net.lemnik.eodsql.Select;
+
+/**
+ * Interface for querying / updating the metabol database.
+ * 
+ * @author Bernd Rinn
+ */
+public interface IMSRunDAO extends BaseQuery
+{
+    final String ALL_MSRUN_COLUMNS =
+            "msruns.msRunId, msruns.permId, msruns.rawDataFileName, msruns.rawDataFilePath, "
+                    + "msruns.acquisitionDate, msruns.instrumentType, msruns.instrumentManufacturer, "
+                    + "msruns.instrumentModel, msruns.methodIonisation, msruns.methodSeparation, "
+                    + "msruns.startTime, msruns.endTime";
+
+    @Select("INSERT INTO msruns (permId, rawDataFileName, rawDataFilePath, acquisitionDate, "
+            + "instrumentType, instrumentManufacturer, instrumentModel, methodIonisation, "
+            + "methodSeparation, startTime, endTime) values (?{1.permId}, ?{1.rawDataFileName}, "
+            + "?{1.rawDataFilePath}, ?{1.acquisitionDate}, ?{1.instrumentType}, "
+            + "?{1.instrumentManufacturer}, ?{1.instrumentModel}, ?{1.methodIonisation}, "
+            + "?{1.methodSeparation}, ?{1.startTime}, ?{1.endTime}) returning msRunId")
+    public long addMSRun(MSRunDTO msRun);
+
+    @Select("INSERT INTO chromatograms (msRunId, Q1MZ, Q3LowMz, Q3HighMz, label, polarity, runTimes, "
+            + "intensities) values (?{1.msRunId}, ?{1.q1Mz}, "
+            + "?{1.q3LowMz}, ?{1.q3HighMz}, ?{1.label}, "
+            + "?{1.polarity}, ?{1.runTimes}, ?{1.intensities}) returning chromId")
+    public long addChromatogram(ChromatogramDTO chromatogram);
+
+    @Select(sql = "SELECT * from msruns", rubberstamp = true)
+    public DataIterator<MSRunDTO> getMSRuns();
+
+    @Select(sql = "SELECT * from msruns where rawDataFileName=?{1}", rubberstamp = true)
+    public DataIterator<MSRunDTO> getMSRunsForRawDataFile(String rawDataFileName);
+
+    @Select("SELECT msruns.*, count(chromatograms.*) AS chromCount from msruns "
+            + "LEFT JOIN chromatograms USING(msRunId) where msruns.msRunId=?{1} GROUP BY "
+            + ALL_MSRUN_COLUMNS)
+    public MSRunDTO getMSRunById(long id);
+
+    @Select("SELECT msruns.*, count(chromatograms.*) AS chromCount from msruns "
+            + "LEFT JOIN chromatograms USING(msRunId) where msruns.permId=?{1} GROUP BY "
+            + ALL_MSRUN_COLUMNS)
+    public MSRunDTO getMSRunByPermId(String permId);
+
+    @Select("SELECT msruns.* FROM msrun LEFT JOIN chromatograms USING(msRunId) "
+            + "where chromatograms.chromId = ?{1.chromId}")
+    public MSRunDTO getMSRunForChromatogram(ChromatogramDTO chromatogram);
+
+    @Select("SELECT chromatograms.* FROM chromatograms where chromId=?{1}")
+    public ChromatogramDTO getChromatogramById(long id);
+    
+    @Select("SELECT chromatograms.* FROM chromatograms where label=?{1}")
+    public ChromatogramDTO getChromatogramByLabel(String label);
+
+    @Select(sql = "SELECT chromatograms.* FROM chromatograms LEFT JOIN msruns USING(msRunId) "
+            + "where msRunId=?{1.msRunId}", rubberstamp = true)
+    public DataIterator<ChromatogramDTO> getChromatogramsForRun(MSRunDTO msRun);
+
+    @Select(sql = "SELECT chromId, msRunId, Q1Mz, Q3LowMz, Q3HighMz, label, polarity FROM chromatograms "
+            + "LEFT JOIN msruns USING(msRunId) " + "where msRunId=?{1.msRunId}", rubberstamp = true)
+    public DataIterator<ChromatogramDTO> getChromatogramsForRunNoData(MSRunDTO msRun);
+
+}
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ListChromatogramLabels.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ListChromatogramLabels.java
new file mode 100644
index 00000000000..583b9deb56c
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/ListChromatogramLabels.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+
+import net.lemnik.eodsql.DataIterator;
+
+/**
+ * A method for listing all chromatogram labels of all run.
+ *
+ * @author Bernd Rinn
+ */
+public class ListChromatogramLabels
+{
+
+    public static void main(String[] args) throws SQLException
+    {
+        final Connection conn = DBFactory.getConnection();
+        try
+        {
+            final IMSRunDAO dao = DBFactory.getDAO(conn);
+            if (args.length > 0)
+            {
+                for (String fn : args)
+                {
+                    String rawFile = fn;
+                    if (rawFile.endsWith(".RAW") == false)
+                    {
+                        rawFile += ".RAW";
+                    }
+                    listChromatogramsForRuns(dao, dao.getMSRunsForRawDataFile(rawFile)); 
+                }
+            } else
+            {
+                listChromatogramsForRuns(dao, dao.getMSRuns());
+            }
+        } finally
+        {
+            conn.close();
+        }
+    }
+
+    private static void listChromatogramsForRuns(final IMSRunDAO dao, DataIterator<MSRunDTO> runs)
+    {
+        for (MSRunDTO run : runs)
+        {
+            String msRunName = run.rawDataFileName;
+            if (msRunName.endsWith(".RAW"))
+            {
+                msRunName = msRunName.substring(0, msRunName.length() - 4);
+            }
+            for (ChromatogramDTO chrom : dao.getChromatogramsForRunNoData(run))
+            {
+                System.out.println(msRunName + "\t" + chrom.label);
+            }
+        }
+    }
+
+}
diff --git a/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/MSRunDTO.java b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/MSRunDTO.java
new file mode 100644
index 00000000000..24dd268e69d
--- /dev/null
+++ b/rtd_yeastx/source/java/ch/systemsx/cisd/yeastx/eicml/MSRunDTO.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import net.lemnik.eodsql.AutoGeneratedKeys;
+
+import org.apache.commons.lang.builder.ToStringBuilder;
+import org.xml.sax.SAXException;
+
+/**
+ * A class that represents a MSRun in an eicML file.
+ */
+public class MSRunDTO
+{
+    @AutoGeneratedKeys
+    public long msRunId;
+
+    public String permId;
+    
+    public String rawDataFilePath;
+
+    public String rawDataFileName;
+
+    public String instrumentType;
+
+    public String instrumentManufacturer;
+
+    public String instrumentModel;
+
+    public String methodIonisation;
+
+    public String methodSeparation;
+
+    public String acquisitionDate;
+
+    public int chromCount = -1;
+
+    public float startTime = Float.NaN;
+
+    public float endTime = Float.NaN;
+
+    void set(String name, String value) throws SAXException
+    {
+        if ("filePath".equals(name))
+        {
+            rawDataFilePath = value;
+        } else if ("fileName".equals(name))
+        {
+            rawDataFileName = value;
+        } else if ("instrumentType".equals(name))
+        {
+            instrumentType = value;
+        } else if ("instrumentManufacturer".equals(name))
+        {
+            instrumentManufacturer = value;
+        } else if ("instrumentModel".equals(name))
+        {
+            instrumentModel = value;
+        } else if ("methodIonisation".equals(name))
+        {
+            methodIonisation = value;
+        } else if ("methodSeparation".equals(name))
+        {
+            methodSeparation = value;
+        } else if ("acquisitionDate".equals(name))
+        {
+            acquisitionDate = value;
+        } else if ("chromCount".equals(name) && value.length() > 0)
+        {
+            chromCount = Integer.parseInt(value);
+        } else if ("msRunId".equals(name) && value.length() > 0)
+        {
+            msRunId = Long.parseLong(value);
+        } else if ("startTime".equals(name) && value.length() > 0)
+        {
+            startTime = Float.parseFloat(value);
+        } else if ("endTime".equals(name) && value.length() > 0)
+        {
+            endTime = Float.parseFloat(value);
+        }
+    }
+
+    @Override
+    public String toString()
+    {
+        return ToStringBuilder.reflectionToString(this);
+    }
+}
\ No newline at end of file
diff --git a/rtd_yeastx/source/sql/schema.sql b/rtd_yeastx/source/sql/schema.sql
new file mode 100644
index 00000000000..130a3cc3653
--- /dev/null
+++ b/rtd_yeastx/source/sql/schema.sql
@@ -0,0 +1,38 @@
+CREATE DATABASE metabol OWNER BRINN ENCODING 'UTF8';
+
+\c metabol
+
+SET ROLE 'brinn';
+
+CREATE TABLE msruns (
+  msRunId BIGSERIAL NOT NULL,
+  permId VARCHAR(40),
+  rawDataFileName VARCHAR(255) NOT NULL,
+  rawDataFilePath VARCHAR(1000) DEFAULT NULL,
+  acquisitionDate VARCHAR(30) DEFAULT NULL,
+  instrumentType VARCHAR(20) DEFAULT NULL,
+  instrumentManufacturer VARCHAR(50) DEFAULT NULL,
+  instrumentModel VARCHAR(100) DEFAULT NULL,
+  methodIonisation VARCHAR(10) DEFAULT NULL,
+  methodSeparation VARCHAR(100) DEFAULT NULL,
+  startTime REAL NOT NULL,
+  endTime REAL NOT NULL,
+  PRIMARY KEY (msRunId),
+  UNIQUE(permId)
+);
+
+CREATE TABLE chromatograms (
+  chromId BIGSERIAL NOT NULL,
+  msRunId BIGINT NOT NULL,
+  Q1Mz REAL NOT NULL,
+  Q3LowMz REAL NOT NULL,
+  Q3HighMz REAL NOT NULL,
+  label VARCHAR(100) DEFAULT NULL,
+  polarity VARCHAR(1) DEFAULT NULL,
+  runTimes TEXT NOT NULL,
+  intensities TEXT NOT NULL,
+  PRIMARY KEY (chromId),
+  CONSTRAINT FK_chromatogram_1 FOREIGN KEY (msRunId) REFERENCES msruns (msRunId) ON DELETE CASCADE ON UPDATE CASCADE
+);
+
+CREATE INDEX chromatogram_fk on chromatograms(msRunId);
\ No newline at end of file
diff --git a/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/eicml/ReadChromatogramsPerformanceTest.java b/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/eicml/ReadChromatogramsPerformanceTest.java
new file mode 100644
index 00000000000..017b58885da
--- /dev/null
+++ b/rtd_yeastx/sourceTest/java/ch/systemsx/cisd/yeastx/eicml/ReadChromatogramsPerformanceTest.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.yeastx.eicml;
+
+import java.sql.Connection;
+import java.sql.SQLException;
+
+/**
+ * A performance test of reading all chromatograms from the database.
+ *
+ * @author Bernd Rinn
+ */
+public class ReadChromatogramsPerformanceTest
+{
+
+    public static void main(String[] args) throws SQLException
+    {
+        long start = System.currentTimeMillis();
+        final Connection conn = DBFactory.getConnection();
+        try
+        {
+            final IMSRunDAO dao = DBFactory.getDAO(conn);
+            for (MSRunDTO run : dao.getMSRuns())
+            {
+                // We need to iterate over the chromatograms to make sure they are really read.
+                for (@SuppressWarnings("unused")
+                ChromatogramDTO chromatogram : dao.getChromatogramsForRun(run))
+                {
+                    // Nothing to do.
+                }
+            }
+        } finally
+        {
+            conn.close();
+        }
+        System.out.println((System.currentTimeMillis() - start) / 1000.0f);
+    }
+
+}
-- 
GitLab