Skip to content
Snippets Groups Projects
Commit 26faf195 authored by kohleman's avatar kohleman
Browse files

- added filtering for fastq.gz files

- works for all ILLUMINA_SEQUENCING samples, not only the Undetermined

SVN: 34531
parent 1344332a
No related branches found
No related tags found
No related merge requests found
...@@ -9,6 +9,9 @@ import java.util.HashMap; ...@@ -9,6 +9,9 @@ import java.util.HashMap;
public class DbAccess public class DbAccess
{ {
private static final String FASTQ_GZ_SUFFIX = ".fastq.gz";
/** /**
* Connecting to the pathifno_DB * Connecting to the pathifno_DB
* @return Connection * @return Connection
...@@ -53,6 +56,7 @@ public class DbAccess ...@@ -53,6 +56,7 @@ public class DbAccess
ResultSet rs = null; ResultSet rs = null;
try try
{ {
System.out.println("Found data set with permID: " + permId);
rs = st.executeQuery("select dsf.id, dsf.file_name, dsf.checksum_crc32 from data_sets ds," rs = st.executeQuery("select dsf.id, dsf.file_name, dsf.checksum_crc32 from data_sets ds,"
+ " data_set_files dsf where ds.code='" + " data_set_files dsf where ds.code='"
+ permId + "' and ds.id =dsf.dase_id and dsf.is_directory = FALSE;"); + permId + "' and ds.id =dsf.dase_id and dsf.is_directory = FALSE;");
...@@ -62,7 +66,10 @@ public class DbAccess ...@@ -62,7 +66,10 @@ public class DbAccess
String fileName = rs.getString("file_name"); String fileName = rs.getString("file_name");
Integer checksum = rs.getInt("checksum_crc32"); Integer checksum = rs.getInt("checksum_crc32");
dataSetResult.put(fileName, checksum); if (fileName.endsWith(FASTQ_GZ_SUFFIX))
{
dataSetResult.put(fileName, checksum);
}
} }
} catch (SQLException e) } catch (SQLException e)
......
...@@ -79,10 +79,14 @@ public class create_metadata ...@@ -79,10 +79,14 @@ public class create_metadata
private static final String INDEX1_NOINDEX_VALUE = "NOINDEX"; private static final String INDEX1_NOINDEX_VALUE = "NOINDEX";
private static final String INDEX2_NOINDEX_VALUE = "NOINDEX";
private static final String DATASET_TYPE_CODE_FASTQ_GZ = "FASTQ_GZ"; private static final String DATASET_TYPE_CODE_FASTQ_GZ = "FASTQ_GZ";
private static final String INDEX1_PROPERTY_CODE = "BARCODE"; private static final String INDEX1_PROPERTY_CODE = "BARCODE";
private static final String INDEX2_PROPERTY_CODE = "INDEX2";
private static final String TSV_FLOWCELL_PROPERTIES = "FLOWCELL PROPERTIES"; private static final String TSV_FLOWCELL_PROPERTIES = "FLOWCELL PROPERTIES";
private static final String TSV_FASTQ_FILES = "FASTQ_FILES"; private static final String TSV_FASTQ_FILES = "FASTQ_FILES";
...@@ -155,17 +159,29 @@ public class create_metadata ...@@ -155,17 +159,29 @@ public class create_metadata
List<Sample> sampleList = searchSample(infoService, sessionToken, sampleCode, fetchOptions); List<Sample> sampleList = searchSample(infoService, sessionToken, sampleCode, fetchOptions);
if (sampleList.size() < 1) if (sampleList.size() < 1)
{ {
System.out.println(sampleCode + " Not found!"); System.out.println(sampleCode + " not found!");
} }
SortedMap<String, SortedMap<String, String>> sampleMap = getProperties(sampleList); SortedMap<String, SortedMap<String, String>> sampleMap = getProperties(sampleList);
SortedMap<String, SortedMap<String, String>> flowcellMap = null; SortedMap<String, SortedMap<String, String>> flowcellMap = null;
String flowcellCode = ""; String flowcellCode = "";
// Should be always a single sample
for (Sample sample : sampleList) for (Sample sample : sampleList)
{ {
List<Sample> children = sample.getChildren(); List<Sample> children = sample.getChildren();
String permId = extractDataSets(infoService, sessionToken, children); if (children.size() == 0)
{
System.out.println("Skipping..." + sample.getCode() + ". No children found for sample.");
continue;
}
String permId = extractDataSets(infoService, sessionToken, children, sample);
if (permId.equals(""))
{
System.out.println("Skipping..." + sample.getCode() + ". No Data Set found.");
continue;
}
HashMap<String, Integer> dbResult = DbAccess.doQuery(connection, permId); HashMap<String, Integer> dbResult = DbAccess.doQuery(connection, permId);
for (Sample child : children) for (Sample child : children)
{ {
...@@ -222,57 +238,59 @@ public class create_metadata ...@@ -222,57 +238,59 @@ public class create_metadata
SortedMap<String, SortedMap<String, String>> flowcellMap, HashMap<String, Integer> dbResult, SortedMap<String, SortedMap<String, String>> flowcellMap, HashMap<String, Integer> dbResult,
String outputFolder) String outputFolder)
{ {
assert flowcellMap.size() == 1; if (flowcellMap != null && flowcellMap.size() == 1)
SortedMap<String, String> flowcellProperties = flowcellMap.get(flowcellMap.firstKey());
for (String key : sampleMap.keySet())
{ {
SortedMap<String, String> currentSample = sampleMap.get(key); SortedMap<String, String> flowcellProperties = flowcellMap.get(flowcellMap.firstKey());
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(key);
stringBuilder.append("_");
stringBuilder.append(flowcellProperties.get("RUN_NAME_FOLDER"));
stringBuilder.append("_metadata");
stringBuilder.append(TSV_ENDING);
Path path = Paths.get(outputFolder, stringBuilder.toString());
File metaDataFile = new File(path.toUri());
metaDataFile.getParentFile().mkdirs();
try
{
metaDataFile.createNewFile();
} catch (IOException e1)
{
e1.printStackTrace();
}
try for (String key : sampleMap.keySet())
{ {
BufferedWriter fOut = new BufferedWriter(new FileWriter(metaDataFile)); SortedMap<String, String> currentSample = sampleMap.get(key);
for (String propertyKey : currentSample.keySet())
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append(key.replace("-", "_"));
stringBuilder.append("_");
stringBuilder.append(flowcellProperties.get("RUN_NAME_FOLDER"));
stringBuilder.append("_metadata");
stringBuilder.append(TSV_ENDING);
Path path = Paths.get(outputFolder, stringBuilder.toString());
File metaDataFile = new File(path.toUri());
metaDataFile.getParentFile().mkdirs();
try
{ {
fOut.write(propertyKey + "\t" + currentSample.get(propertyKey) + "\n"); metaDataFile.createNewFile();
} } catch (IOException e1)
fOut.write("\n" + TSV_FLOWCELL_PROPERTIES + "\n");
for (String flowcellPropertyKey : flowcellProperties.keySet())
{ {
fOut.write(flowcellPropertyKey + "\t" + flowcellProperties.get(flowcellPropertyKey) + "\n"); e1.printStackTrace();
} }
fOut.write("\n" + TSV_FASTQ_FILES + "\n");
for (String fileName : dbResult.keySet()) try
{ {
fOut.write(fileName + "\t" + crc32ToString(dbResult.get(fileName)) + "\n"); BufferedWriter fOut = new BufferedWriter(new FileWriter(metaDataFile));
} for (String propertyKey : currentSample.keySet())
{
fOut.write(propertyKey + "\t" + currentSample.get(propertyKey) + "\n");
}
fOut.write("\n" + TSV_FLOWCELL_PROPERTIES + "\n");
fOut.close(); for (String flowcellPropertyKey : flowcellProperties.keySet())
System.out.println("Written " + metaDataFile); {
fOut.write(flowcellPropertyKey + "\t" + flowcellProperties.get(flowcellPropertyKey) + "\n");
}
fOut.write("\n" + TSV_FASTQ_FILES + "\n");
} catch (IOException e) for (String fileName : dbResult.keySet())
{ {
e.printStackTrace(); fOut.write(fileName + "\t" + crc32ToString(dbResult.get(fileName)) + "\n");
}
fOut.close();
System.out.println("Written " + metaDataFile);
} catch (IOException e)
{
e.printStackTrace();
}
} }
} }
} }
...@@ -286,16 +304,30 @@ public class create_metadata ...@@ -286,16 +304,30 @@ public class create_metadata
return sampleList; return sampleList;
} }
private static String extractDataSets(IGeneralInformationService infoService, String sessionToken, List<Sample> children) private static String extractDataSets(IGeneralInformationService infoService, String sessionToken,
List<Sample> children, Sample sample)
{ {
List<DataSet> flowLaneDatasets = infoService.listDataSets(sessionToken, children); List<DataSet> flowLaneDatasets = infoService.listDataSets(sessionToken, children);
String permId = ""; String permId = "";
Map<String, String> sampleProperties = sample.getProperties();
String sample_index1 = sampleProperties.get(INDEX1_PROPERTY_CODE);
if (sample_index1 == null)
{
sample_index1 = INDEX1_NOINDEX_VALUE;
}
String sample_index2 = sampleProperties.get(INDEX2_PROPERTY_CODE);
if (sample_index2 == null)
{
sample_index2 = INDEX2_NOINDEX_VALUE;
}
for (DataSet ds : flowLaneDatasets) for (DataSet ds : flowLaneDatasets)
{ {
HashMap<String, String> dsProperties = ds.getProperties(); HashMap<String, String> dsProperties = ds.getProperties();
String barcode = dsProperties.get(INDEX1_PROPERTY_CODE); String index1 = dsProperties.get(INDEX1_PROPERTY_CODE);
if (ds.getDataSetTypeCode().equals(DATASET_TYPE_CODE_FASTQ_GZ) && barcode.equals(INDEX1_NOINDEX_VALUE)) String index2 = dsProperties.get(INDEX2_PROPERTY_CODE);
if (ds.getDataSetTypeCode().equals(DATASET_TYPE_CODE_FASTQ_GZ) &&
index1.equals(sample_index1) && index2.equals(sample_index2))
{ {
permId = ds.getCode(); permId = ds.getCode();
return permId; return permId;
...@@ -373,7 +405,7 @@ public class create_metadata ...@@ -373,7 +405,7 @@ public class create_metadata
{ {
String cwd = System.getProperty("user.dir"); String cwd = System.getProperty("user.dir");
String[] arrayCwd = cwd.split("@"); // just use a split with a not valid char to convert the String into String [] String[] arrayCwd = cwd.split("@"); // just use a split with a not valid char to convert the String into String []
System.out.println("No outout folder specified! Will use: " + cwd); System.out.println("No output folder specified! Will use: " + cwd);
commandLineMap.put(CL_PARAMETER_OUTPUT_FOLDER, arrayCwd); commandLineMap.put(CL_PARAMETER_OUTPUT_FOLDER, arrayCwd);
} }
} catch (ParseException exp) } catch (ParseException exp)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment