diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BlastDatabaseCreationMaintenanceTask.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BlastDatabaseCreationMaintenanceTask.java new file mode 100644 index 0000000000000000000000000000000000000000..5a3329ebe48ce98d5c8b10faca63363cb53fbc99 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/BlastDatabaseCreationMaintenanceTask.java @@ -0,0 +1,318 @@ +/* + * Copyright 2014 ETH Zuerich, SIS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.etlserver.plugins; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.io.IOUtils; +import org.apache.log4j.Logger; + +import ch.systemsx.cisd.common.exceptions.ConfigurationFailureException; +import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException; +import ch.systemsx.cisd.common.fasta.SequenceType; +import ch.systemsx.cisd.common.filesystem.FileUtilities; +import ch.systemsx.cisd.common.logging.LogCategory; +import ch.systemsx.cisd.common.logging.LogFactory; +import ch.systemsx.cisd.common.maintenance.IMaintenanceTask; +import ch.systemsx.cisd.common.process.ProcessExecutionHelper; +import ch.systemsx.cisd.common.process.ProcessResult; +import ch.systemsx.cisd.openbis.common.io.hierarchical_content.api.IHierarchicalContent; +import ch.systemsx.cisd.openbis.common.io.hierarchical_content.api.IHierarchicalContentNode; +import ch.systemsx.cisd.openbis.dss.generic.shared.IConfigProvider; +import ch.systemsx.cisd.openbis.dss.generic.shared.IEncapsulatedOpenBISService; +import ch.systemsx.cisd.openbis.dss.generic.shared.IHierarchicalContentProvider; +import ch.systemsx.cisd.openbis.dss.generic.shared.ServiceProvider; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.AbstractExternalData; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PhysicalDataSet; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.TrackingDataSetCriteria; + +/** + * This maintenance task creates a BLAST database for all files + * + * @author Franz-Josef Elmer + */ +public class BlastDatabaseCreationMaintenanceTask implements IMaintenanceTask +{ + static final String BLAST_TOOLS_DIRECTORY_PROPERTY = "blast-tools-directory"; + static final String BLAST_DATABASES_FOLDER_PROPERTY = "blast-databases-folder"; + static final String LAST_SEEN_DATA_SET_FILE_PROPERTY = "last-seen-data-set-file"; + static final String FILE_TYPES_PROPERTY = "file-types"; + + private static final String DEFAULT_LAST_SEEN_DATA_SET_FILE = "last-seen-data-set-for-BLAST-database-creation"; + private static final String DEFAULT_FILE_TYPES = ".fasta .fa .fsa"; + private static final String DEFAULT_BLAST_DATABASES_FOLDER = "blast-databases"; + + private static final Logger operationLog = + LogFactory.getLogger(LogCategory.OPERATION, BlastDatabaseCreationMaintenanceTask.class); + private static final Logger machineLog = + LogFactory.getLogger(LogCategory.MACHINE, BlastDatabaseCreationMaintenanceTask.class); + + private File lastSeenDataSetFile; + + private List<String> fileTypes; + private File blastDatabasesFolder; + private File tmpFolder; + private String makeblastdb; + private String makembindex; + + @Override + public void setUp(String pluginName, Properties properties) + { + fileTypes = Arrays.asList(properties.getProperty(FILE_TYPES_PROPERTY, DEFAULT_FILE_TYPES).split(" +")); + operationLog.info("File types: " + fileTypes); + lastSeenDataSetFile = getFile(properties, LAST_SEEN_DATA_SET_FILE_PROPERTY, DEFAULT_LAST_SEEN_DATA_SET_FILE); + blastDatabasesFolder = getFile(properties, BLAST_DATABASES_FOLDER_PROPERTY, DEFAULT_BLAST_DATABASES_FOLDER); + operationLog.info("BLAST databases folder: " + blastDatabasesFolder); + tmpFolder = new File(blastDatabasesFolder, "tmp"); + FileUtilities.deleteRecursively(tmpFolder); + if (tmpFolder.mkdirs() == false) + { + throw new ConfigurationFailureException("Couldn't create folder '" + tmpFolder + "'."); + } + String blastToolDirectory = getBLASTToolDirectory(properties); + makeblastdb = blastToolDirectory + "makeblastdb"; + if (process(makeblastdb, "-version") == false) + { + operationLog.error("BLAST isn't installed or property '" + BLAST_TOOLS_DIRECTORY_PROPERTY + + "' hasn't been correctly specified."); + } + makembindex = blastToolDirectory + "makembindex"; + + } + + private File getFile(Properties properties, String pathProperty, String defaultPath) + { + String path = properties.getProperty(pathProperty); + return path == null ? new File(getConfigProvider().getStoreRoot(), defaultPath) : new File(path); + } + + private String getBLASTToolDirectory(Properties properties) + { + String blastToolsDirectory = properties.getProperty(BLAST_TOOLS_DIRECTORY_PROPERTY, ""); + if (blastToolsDirectory.endsWith("/") || blastToolsDirectory.isEmpty()) + { + return blastToolsDirectory; + } + return blastToolsDirectory + "/"; + } + + @Override + public void execute() + { + IHierarchicalContentProvider contentProvider = getContentProvider(); + IEncapsulatedOpenBISService service = getOpenBISService(); + List<AbstractExternalData> dataSets = getDataSets(service); + if (dataSets.isEmpty() == false) + { + operationLog.info("Scan " + dataSets.size() + " data sets for creating BLAST databases."); + } + for (AbstractExternalData dataSet : dataSets) + { + if (dataSet.tryGetAsDataSet() != null && dataSet.isAvailable()) + { + try + { + createBlastDatabase(dataSet, contentProvider); + } catch (Exception ex) + { + operationLog.error("Error caused by creating BLAST database for data set " + dataSet.getCode() + + ": " + ex.getMessage(), ex); + } + } + updateLastSeenEventId(dataSet.getId()); + } + } + + private void createBlastDatabase(AbstractExternalData dataSet, IHierarchicalContentProvider contentProvider) + { + String dataSetCode = dataSet.getCode(); + FastaFileBuilder builder = new FastaFileBuilder(tmpFolder, dataSetCode); + IHierarchicalContent content = contentProvider.asContent(dataSet); + IHierarchicalContentNode rootNode = content.getRootNode(); + handle(rootNode, builder); + builder.finish(); + SequenceType[] values = SequenceType.values(); + for (SequenceType sequenceType : values) + { + File fastaFile = builder.getTemporaryFastaFileOrNull(sequenceType); + if (fastaFile == null) + { + continue; + } + String fastaFilePath = fastaFile.getAbsolutePath(); + String databaseName = FilenameUtils.removeExtension(fastaFile.getName()); + String databaseFile = new File(blastDatabasesFolder, databaseName).getAbsolutePath(); + String dbtype = sequenceType.toString().toLowerCase(); + boolean success = process(makeblastdb, "-in", fastaFilePath, "-dbtype", dbtype, + "-title", databaseName, "-out", databaseFile); + if (success == false) + { + break; + } + File databaseSeqFile = new File(databaseFile + ".nsq"); + if (databaseSeqFile.exists() && databaseSeqFile.length() > 1000000) + { + process(makembindex, "-iformat", "blastdb", "-input", databaseFile, "-old_style_index", "false"); + } + File allDatabaseFile = new File(blastDatabasesFolder, "all-" + dbtype + ".nal"); + if (allDatabaseFile.exists() == false) + { + FileUtilities.writeToFile(allDatabaseFile, "TITLE all-" + dbtype + "\nDBLIST"); + } + FileUtilities.appendToFile(allDatabaseFile, " " + databaseName, false); + } + builder.cleanUp(); + } + + private boolean process(String... command) + { + return process(Arrays.asList(command)); + } + + private boolean process(List<String> command) + { + ProcessResult processResult = ProcessExecutionHelper.run(command, operationLog, machineLog); + if (processResult.isOK()) + { + processResult.logAsInfo(); + } + return processResult.isOK(); + } + + private void handle(IHierarchicalContentNode node, FastaFileBuilder builder) + { + if (node.isDirectory()) + { + for (IHierarchicalContentNode childNode : node.getChildNodes()) + { + handle(childNode, builder); + } + } else + { + String nodeName = node.getName(); + for (String fileType : fileTypes) + { + if (nodeName.endsWith(fileType)) + { + appendTo(builder, node); + break; + } + } + } + } + + private void appendTo(FastaFileBuilder builder, IHierarchicalContentNode node) + { + InputStream inputStream = node.getInputStream(); + BufferedReader bufferedReader = null; + String relativePath = node.getRelativePath(); + builder.setFilePath(relativePath); + try + { + bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); + String line; + while ((line = bufferedReader.readLine()) != null) + { + builder.handle(line); + } + } catch (IOException e) + { + throw new EnvironmentFailureException("Error while reading data from '" + relativePath + + "': " + e.getMessage(), e); + } finally + { + IOUtils.closeQuietly(bufferedReader); + } + } + + private List<AbstractExternalData> getDataSets(IEncapsulatedOpenBISService service) + { + Long lastSeenEventId = getLastSeenEventId(); + if (lastSeenEventId == null) + { + lastSeenEventId = 0L; + } + TrackingDataSetCriteria criteria = new TrackingDataSetCriteria(lastSeenEventId); + List<AbstractExternalData> dataSets = service.listNewerDataSets(criteria); + Collections.sort(dataSets, new Comparator<AbstractExternalData>() + { + @Override + public int compare(AbstractExternalData d0, AbstractExternalData d1) + { + long id0 = d0.getId(); + long id1 = d1.getId(); + return id0 > id1 ? 1 : (id0 < id1 ? -1 : 0); + } + }); + return dataSets; + } + + private Long getLastSeenEventId() + { + Long result = null; + if (lastSeenDataSetFile.exists()) + { + try + { + result = Long.parseLong(FileUtilities.loadToString(lastSeenDataSetFile).trim()); + } catch (Exception ex) + { + if (operationLog.isDebugEnabled()) + { + operationLog.debug("Cannot load last seen event id from file :" + + lastSeenDataSetFile, ex); + } + } + } + return result; + } + + private void updateLastSeenEventId(Long eventId) + { + FileUtilities.writeToFile(lastSeenDataSetFile, String.valueOf(eventId) + "\n"); + } + + IConfigProvider getConfigProvider() + { + return ServiceProvider.getConfigProvider(); + } + + IEncapsulatedOpenBISService getOpenBISService() + { + return ServiceProvider.getOpenBISService(); + } + + IHierarchicalContentProvider getContentProvider() + { + return ServiceProvider.getHierarchicalContentProvider(); + } + +} diff --git a/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/FastaFileBuilder.java b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/FastaFileBuilder.java new file mode 100644 index 0000000000000000000000000000000000000000..5127c4ee84766e35379c42b8ba762ec506b4c7c0 --- /dev/null +++ b/datastore_server/source/java/ch/systemsx/cisd/etlserver/plugins/FastaFileBuilder.java @@ -0,0 +1,216 @@ +/* + * Copyright 2014 ETH Zuerich, SIS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.etlserver.plugins; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import ch.systemsx.cisd.common.exceptions.EnvironmentFailureException; +import ch.systemsx.cisd.common.fasta.FastaUtilities; +import ch.systemsx.cisd.common.fasta.SequenceType; +import ch.systemsx.cisd.common.filesystem.FileUtilities; +import ch.systemsx.cisd.common.string.Template; + +/** + * Helper class to create temporary FASTA files. + * + * @author Franz-Josef Elmer + */ +class FastaFileBuilder +{ + private static final class FastaEntry + { + private List<String> lines = new ArrayList<String>(); + private SequenceType seqType; + + FastaEntry(String id) + { + lines.add(">" + id); + } + + void setSeqType(SequenceType seqType) + { + this.seqType = seqType; + } + + SequenceType getSeqType() + { + return seqType; + } + + void appendSeq(String seq) + { + lines.add(seq); + } + + List<String> getLines() + { + return lines; + } + } + + private enum EntryType { FASTA, FASTQ } + + private static final Template ID_EXTENSION_TEMPLATE = new Template("[Data set: ${data_set}, File: ${file}]"); + + private final String dataSetCode; + private final File tempFolder; + + private final Map<SequenceType, PrintWriter> writers = new HashMap<SequenceType, PrintWriter>(); + private String idExtension; + private FastaEntry currentFastaEntry; + + private EntryType currentEntryType; + + FastaFileBuilder(File tempFolder, String dataSetCode) + { + this.tempFolder = tempFolder; + this.dataSetCode = dataSetCode; + } + + void setFilePath(String filePath) + { + writeFastaEntry(); + Template template = ID_EXTENSION_TEMPLATE.createFreshCopy(); + template.bind("data_set", dataSetCode); + template.bind("file", filePath); + idExtension = template.createText(); + } + + void handle(String line) + { + EntryType entryType = tryToGetEntryType(line); + if (entryType != null) + { + writeFastaEntry(); + if (idExtension == null) + { + throw new IllegalStateException("File path not set [Data Set: " + dataSetCode + "]."); + } + currentFastaEntry = new FastaEntry(line.substring(1) + " " + idExtension); + currentEntryType = entryType; + } else + { + if (currentFastaEntry == null) + { + throw new IllegalStateException("Invalid line " + idExtension + ". Line with identifier expected: " + line); + } + if (currentFastaEntry.getSeqType() == null) + { + currentFastaEntry.setSeqType(FastaUtilities.determineSequenceType(line)); + currentFastaEntry.appendSeq(line); + } else if (currentEntryType == EntryType.FASTA) + { + currentFastaEntry.appendSeq(line); + } + } + } + + void finish() + { + writeFastaEntry(); + for (PrintWriter printWriter : writers.values()) + { + printWriter.close(); + } + } + + File getTemporaryNuclFastaFileOrNull() + { + return getTemporaryFastaFileOrNull(SequenceType.NUCL); + } + + File getTemporaryProtFastaFileOrNull() + { + return getTemporaryFastaFileOrNull(SequenceType.PROT); + } + + File getTemporaryFastaFileOrNull(SequenceType seqType) + { + return writers.containsKey(seqType) ? getFastaFile(seqType) : null; + } + + void cleanUp() + { + SequenceType[] values = SequenceType.values(); + for (SequenceType sequenceType : values) + { + File file = getTemporaryFastaFileOrNull(sequenceType); + if (file != null) + { + FileUtilities.delete(file); + } + } + } + + private void writeFastaEntry() + { + if (currentFastaEntry == null) + { + return; + } + SequenceType seqType = currentFastaEntry.getSeqType(); + List<String> lines = currentFastaEntry.getLines(); + if (seqType == null) + { + throw new IllegalStateException("Unknown type of the following FASTA entry: " + lines); + } + PrintWriter printer = getPrinter(seqType); + for (String line : lines) + { + printer.println(line); + } + currentFastaEntry = null; + } + + private PrintWriter getPrinter(SequenceType seqType) + { + PrintWriter printWriter = writers.get(seqType); + if (printWriter == null) + { + File fastaFile = getFastaFile(seqType); + try + { + printWriter = new PrintWriter(new BufferedWriter(new FileWriter(fastaFile))); + writers.put(seqType, printWriter); + } catch (IOException ex) + { + throw new EnvironmentFailureException("Couldn't create temporary FASTA file '" + fastaFile + + "': " + ex.getMessage()); + } + } + return printWriter; + } + + private File getFastaFile(SequenceType seqType) + { + return new File(tempFolder, dataSetCode + "-" + seqType.toString().toLowerCase() + ".fa"); + } + + private EntryType tryToGetEntryType(String line) + { + return line.startsWith(">") ? EntryType.FASTA : (line.startsWith("@") ? EntryType.FASTQ : null); + } + +} \ No newline at end of file diff --git a/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/FastaFileBuilderTest.java b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/FastaFileBuilderTest.java new file mode 100644 index 0000000000000000000000000000000000000000..8804189e4b574d6a52a0b15a1ad551e1c5d01867 --- /dev/null +++ b/datastore_server/sourceTest/java/ch/systemsx/cisd/etlserver/plugins/FastaFileBuilderTest.java @@ -0,0 +1,196 @@ +/* + * Copyright 2014 ETH Zuerich, SIS + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.etlserver.plugins; + +import java.io.File; + +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import ch.systemsx.cisd.base.tests.AbstractFileSystemTestCase; +import ch.systemsx.cisd.common.filesystem.FileUtilities; + + + +/** + * + * + * @author Franz-Josef Elmer + */ +public class FastaFileBuilderTest extends AbstractFileSystemTestCase +{ + private static final String DATA_SET_CODE = "11358-13"; + private File tempFolder; + private FastaFileBuilder builder; + + @BeforeMethod + public void setUpTempFolder() + { + tempFolder = new File(workingDirectory, "temp"); + tempFolder.mkdirs(); + builder = new FastaFileBuilder(tempFolder, DATA_SET_CODE); + } + + @Test + public void testThreeNuclEntriesFromTwoFastaFiles() + { + builder.setFilePath("my-data/1.fa"); + builder.handle(">lcl|1 example 1"); + builder.handle("GTTTACCCAAACTTCTATATGACTT"); + builder.handle("AAATTAAAATAATGCTGAGATGATA"); + builder.handle(">lcl|2 example 2"); + builder.handle("GACTTCTATATGATTTACCCAACTT"); + builder.handle("ATAATGCTGAATTAAAATAAGATGA"); + builder.setFilePath("my-data/2.fa"); + builder.handle(">lcl|3 example 3"); + builder.handle("GACTTCTTTATATGATTTACCCAACTTAGCGT"); + builder.finish(); + + assertEquals(null, builder.getTemporaryProtFastaFileOrNull()); + File temporaryNuclFastaFile = builder.getTemporaryNuclFastaFileOrNull(); + assertEquals(DATA_SET_CODE + "-nucl.fa", FileUtilities.getRelativeFilePath(tempFolder, temporaryNuclFastaFile)); + assertEquals(">lcl|1 example 1 [Data set: 11358-13, File: my-data/1.fa]\n" + + "GTTTACCCAAACTTCTATATGACTT\n" + + "AAATTAAAATAATGCTGAGATGATA\n" + + ">lcl|2 example 2 [Data set: 11358-13, File: my-data/1.fa]\n" + + "GACTTCTATATGATTTACCCAACTT\n" + + "ATAATGCTGAATTAAAATAAGATGA\n" + + ">lcl|3 example 3 [Data set: 11358-13, File: my-data/2.fa]\n" + + "GACTTCTTTATATGATTTACCCAACTTAGCGT", + FileUtilities.loadToString(temporaryNuclFastaFile).trim()); + } + + @Test + public void testFastqFiles() + { + builder.setFilePath("my-data/1.fastq"); + builder.handle("@lcl|1 example 1"); + builder.handle("GTTTACCCAAACTTCTATATGACTT"); + builder.handle("+"); + builder.handle("d^dddadd^BBBBBBefcfffffcc"); + builder.handle("@lcl|2 example 2"); + builder.handle("ATAATGCTGAATTAAAATAAGATGA"); + builder.handle("BBBefcfffffd^dddadd^BBBcc"); + builder.handle("@lcl|3 example 3"); + builder.handle("GACTTCTTTATATGATTTACCCAACTTAGCGT"); + builder.handle("@lcl|4 example 4"); + builder.handle("GACTTCTTTATATGCTTAGCGTATTTACCCAA"); + builder.handle("+"); + builder.finish(); + + assertEquals(null, builder.getTemporaryProtFastaFileOrNull()); + File temporaryNuclFastaFile = builder.getTemporaryNuclFastaFileOrNull(); + assertEquals(DATA_SET_CODE + "-nucl.fa", FileUtilities.getRelativeFilePath(tempFolder, temporaryNuclFastaFile)); + assertEquals(">lcl|1 example 1 [Data set: 11358-13, File: my-data/1.fastq]\n" + + "GTTTACCCAAACTTCTATATGACTT\n" + + ">lcl|2 example 2 [Data set: 11358-13, File: my-data/1.fastq]\n" + + "ATAATGCTGAATTAAAATAAGATGA\n" + + ">lcl|3 example 3 [Data set: 11358-13, File: my-data/1.fastq]\n" + + "GACTTCTTTATATGATTTACCCAACTTAGCGT\n" + + ">lcl|4 example 4 [Data set: 11358-13, File: my-data/1.fastq]\n" + + "GACTTCTTTATATGCTTAGCGTATTTACCCAA", + FileUtilities.loadToString(temporaryNuclFastaFile).trim()); + } + + @Test + public void testNuclFastaFileAndProtFastaFile() + { + builder.setFilePath("my-data/1.fa"); + builder.handle(">lcl|1 example 1"); + builder.handle("GTTTACCCAAACTTCTATATGACTT"); + builder.handle("AAATTAAAATAATGCTGAGATGATA"); + builder.handle(">lcl|2 example 2"); + builder.handle("GACTTCTATATGATTTACCCAACTT"); + builder.handle("ATAATGCTGAATTAAAATAAGATGA"); + builder.setFilePath("my-data/2.fa"); + builder.handle(">lcl|3 example 3"); + builder.handle("VGLTNYAAAYCTGLLLAR"); + builder.finish(); + + File temporaryNuclFastaFile = builder.getTemporaryNuclFastaFileOrNull(); + assertEquals(DATA_SET_CODE + "-nucl.fa", FileUtilities.getRelativeFilePath(tempFolder, temporaryNuclFastaFile)); + assertEquals(">lcl|1 example 1 [Data set: 11358-13, File: my-data/1.fa]\n" + + "GTTTACCCAAACTTCTATATGACTT\n" + + "AAATTAAAATAATGCTGAGATGATA\n" + + ">lcl|2 example 2 [Data set: 11358-13, File: my-data/1.fa]\n" + + "GACTTCTATATGATTTACCCAACTT\n" + + "ATAATGCTGAATTAAAATAAGATGA", + FileUtilities.loadToString(temporaryNuclFastaFile).trim()); + File temporaryProtFastaFile = builder.getTemporaryProtFastaFileOrNull(); + assertEquals(DATA_SET_CODE + "-prot.fa", FileUtilities.getRelativeFilePath(tempFolder, temporaryProtFastaFile)); + assertEquals(">lcl|3 example 3 [Data set: 11358-13, File: my-data/2.fa]\n" + + "VGLTNYAAAYCTGLLLAR", + FileUtilities.loadToString(temporaryProtFastaFile).trim()); + } + + @Test + public void testCleanUp() + { + builder.setFilePath("my-data/1.fa"); + builder.handle(">lcl|1 example 1"); + builder.handle("GTTTACCCAAACTTCTATATGACTT"); + builder.finish(); + File temporaryNuclFastaFile = builder.getTemporaryNuclFastaFileOrNull(); + assertEquals(true, temporaryNuclFastaFile.exists()); + + builder.cleanUp(); + + assertEquals(false, temporaryNuclFastaFile.exists()); + } + + @Test + public void testUnspecifiedFilePath() + { + try + { + builder.handle(">lcl|1"); + } catch (IllegalStateException ex) + { + assertEquals("File path not set [Data Set: 11358-13].", ex.getMessage()); + } + } + + @Test + public void testMissingIdLine() + { + builder.setFilePath("my-data/1.fa"); + try + { + builder.handle("GATTACA"); + } catch (IllegalStateException ex) + { + assertEquals("Invalid line [Data set: 11358-13, File: my-data/1.fa]. " + + "Line with identifier expected: GATTACA", ex.getMessage()); + } + } + + @Test + public void testMissingSequenceLine() + { + builder.setFilePath("my-data/1.fa"); + builder.handle(">lcl|1"); + try + { + builder.finish(); + } catch (IllegalStateException ex) + { + assertEquals("Unknown type of the following FASTA entry: " + + "[>lcl|1 [Data set: 11358-13, File: my-data/1.fa]]", ex.getMessage()); + } + } + +}