diff --git a/MelArray/MelArray.snake b/MelArray/MelArray.snake
index 6f8ff2b578787b3a587aae957d1274cf3b7e60b6..f2beeb451438db0eb3ab858219a25d9a3bf3e96d 100644
--- a/MelArray/MelArray.snake
+++ b/MelArray/MelArray.snake
@@ -10,61 +10,150 @@
 # INIT #
 ########
 
+
+# import entire packages
 import os, logging
 
+# import some specific packages
 from timeit import default_timer as t
 from datetime import datetime
 
+# import the load_config function from the scripts folder
+from scripts.load_config import load_config
+
+
 # set up logging
 logging.basicConfig(level = logging.INFO, format = '#%(asctime)s | %(levelname)-7s | %(filename)-20s: %(message)s')
 
-logging.info('Starting snakemake pipeline ...')
+# store the original "config" dictionary that is created from the input parameters
+input_params = config
 
-# load the configuration file
+# load the configuration file from the "config.yaml" in the current working directory
 config_file_path = os.getcwd() + "/config.yaml"
 config = load_config(config_file_path)
 
-# get the list of files to process
-IDs, = glob_wildcards(IN_FOLDER + "{id}.fastq")
+# store the paths as a separate dictionary
+paths = config['path']
+
+# get the list of files to process using the regexp: [0-9]*.A-\([A-Za-z0-9-]*\)_[A-Za-z0-9_]*_R1.fastq.gz
+#    this creates 4 lists, one for each part of the regexp
+dates, lanes, IDs, others = glob_wildcards(paths['fastq'] +
+    "/{date,[0-9]*}.{lanes,[AB]}-{ID,[A-Za-z0-9-]*}_{other,[A-Za-z0-9_]*}_R1.fastq.gz")
+
+# check if the IDs were not also specified in the input parameters, in which case we should use those IDs
+IDs = input_params.get('IDs', IDs)
+# transform the comma-separated values string into a list of strings
+if type(IDs) == str: IDs = IDs.split(',')
+
+# get the mapping between the initial fastq file path and their ID
+FASTQ_R1_FILES, FASTQ_R2_FILES, i_ID = {}, {}, 0
+# loops through all the IDs
+for ID in IDs:
+    # create the full path for the R1 read and store it in the dictionary under the [ID] key
+    FASTQ_R1_FILES[ID] = paths['fastq'] + '/' + dates[i_ID] + '.' + lanes[i_ID] + '-' + ID + '_' + others[i_ID] + '_R1.fastq.gz'
+    # create the full path for the R2 read and store it in the dictionary under the [ID] key
+    FASTQ_R2_FILES[ID] = paths['fastq'] + '/' + dates[i_ID] + '.' + lanes[i_ID] + '-' + ID + '_' + others[i_ID] + '_R2.fastq.gz'
+    # increment the counter
+    i_ID += 1
+
+
+# piece of code to be executed before the pipeline starts
+onstart:
 
-# check if IDs were not specified in config
-IDs = config.get('ids', IDs)
-if type(IDs) == str:
-    IDs = IDs.split(',')
+    logging.info('Starting MelArray Snakemake pipeline ...')
+    logging.info("IDs:")
+    for ID in IDs:
+        logging.info("      " + ID)
 
 
 #########
 # RULES #
 #########
 
+# final step that will gather all the outputs
 rule all:
     input:
-        expand(OUT_FOLDER + '{id}.bam', id = IDs)
+        expand(paths['dedup'] + '/{id}_sorted_dedup.bam', id = IDs),
+        expand(paths['dedup'] + '/{id}_metrics.txt', id = IDs)
+
+
     run:
         logging.info('Pipeline completed.')
 
 
-rule step1:
+# trimming step
+rule skewer:
     input:
-        IN_FOLDER + '{id}.fastq'
+        # inputs are generated by looking up the path corresponding to the current ID
+        fastq_R1 = lambda wildcards: FASTQ_R1_FILES[wildcards.id],
+        fastq_R2 = lambda wildcards: FASTQ_R2_FILES[wildcards.id],
     output:
-        TMP_FOLDER + '{id}.sam'
+        trimmed_R1 = paths['trimmed'] + '/{id}-trimmed-pair1.fastq.gz',
+        trimmed_R2 = paths['trimmed'] + '/{id}-trimmed-pair2.fastq.gz'
+    log:
+        logfile = paths['trimmed'] + '/{id}_skewer_output.log'
     shell:
-        'cat {input} > {output}'
-
-
-rule step2:
+        # create the command line using the parameters contained in config['skewer']
+        'skewer -t {n_threads} -m {trimming_mode} -q {quality_threshold} -z --quiet '.format(**config['skewer']) +
+            # add the output to be in the 'trimmed' folder and to have the [ID] prefix
+            '-o {OUT_FOLDER_PATH}/{{wildcards.id}} '.format(OUT_FOLDER_PATH = paths['trimmed']) +
+            # finish the command by using the input paths that Snakemake will automatically replace
+            '{input.fastq_R2} {input.fastq_R1} ' +
+            # dump all standard and error output to the log file
+            '&> {log.logfile}'
+
+
+# alignment and sorting step
+rule bwa_bam:
     input:
-        TMP_FOLDER + '{id}.sam'
+        # use the outputs of the previous step as inputs for the current step
+        trimmed_R1 = rules.skewer.output.trimmed_R1,
+        trimmed_R2 = rules.skewer.output.trimmed_R2
     output:
-        OUT_FOLDER + '{id}.bam'
+        sorted_bam = paths['sorted'] + '/{id}_sorted.bam'
+    log:
+        logfile = paths['sorted'] + '/{id}_bwa_bam_output.log'
     shell:
-        'cat {input} > {output}'
+        # create the command line using the parameters contained in config['bwa']
+        'bwa mem -t {n_threads} -M -R "@RG\tID:{ID}\tLB:{LB}\tSM:{{wildcards.id}}\tPL:{PL}\tPU:{PU}" '.format(**config['bwa']) +
+            # complete the bwa command with the reference genome and the two inputs
+            '{ref_genome} {{input.trimmed_R1}} {{input.trimmed_R2}} '.format(**paths) +
+            # dump all standard and error output to the log file
+            '&> {log.logfile} ' +
+        # pipe the output into the picard tools with the amount of memory specified in the config
+        '| java -Xmx{memory}G '.format(**config['bwa']) +
+            # specify the picard JAR path and the output path
+            '-jar {picard} SortSam I=/dev/stdin O={{output.sorted_bam}} '.format(**paths) +
+            # complete the picard command with the static parameters
+            'CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT SO=coordinate ' +
+            # dump all standard and error output to the log file
+            '&> {log.logfile}'
+
+# marking duplicates step
+rule mark_duplicates:
+    input:
+        # use the output of the previous step as input for the current step
+        sorted_bam = rules.bwa_bam.output.sorted_bam
+    output:
+        dedup_bam = paths['dedup'] + '/{id}_sorted_dedup.bam',
+        metrics = paths['dedup'] + '/{id}_metrics.txt'
+    log:
+        logfile = paths['dedup'] + '/{id}_sorted_dedup_output.log'
+    shell:
+        # create the command for the picard tools with the amount of memory specified in the config
+        'java -Xmx{memory}G '.format(**config['bwa']) +
+            # specify the picard JAR path and the output path
+            '-jar {picard} MarkDuplicates I={{input.sorted_bam}} O={{output.dedup_bam}} '.format(**paths) +
+            # complete the command with the output of the metrics file and dump all standard and error output to the log file
+            'M={output.metrics} &> {log.logfile}'
+
 
 
+# piece of code to be  executed after the pipeline ends *without* error
 onsuccess:
     logging.info('Pipeline completed successfully.')
 
+# piece of code to be  executed after the pipeline ends *with* an error
 onerror:
     logging.info('Pipeline failed.')
 
diff --git a/MelArray/config.yaml b/MelArray/config.yaml
index 2443b4af707b0b091aedd947ed6f3ffc8959d5df..cd3b6c22004db01e2be14ab87e1fb16d84f9f9c7 100644
--- a/MelArray/config.yaml
+++ b/MelArray/config.yaml
@@ -9,11 +9,77 @@
 # @date: 2017
 
 
+# TODO: make it flexible for single end and paired-end
+
+
 # file and folder paths
 path:
 
-  # root path for the code/scripts
-  root: "/vagrant/MelArray"
+  # root path for the pipeline / project
+  project_root: "/vagrant/MelArray"
+
+
+  # root path for the data folders
+  data_root: "__PROJECT_ROOT__/data"
+
+  # path to the folder where the original fastq files are stored
+  fastq: "__DATA_ROOT__/fastq"
+
+  # folder where the trimmed fastq files are stored
+  trimmed: "__DATA_ROOT__/trimmed"
+
+  # folder where the sorted bam files are stored
+  sorted: "__DATA_ROOT__/sorted"
+
+  # folder where the sorted & deduplicated bam files are stored
+  dedup: "__DATA_ROOT__/sorted_dedup"
+
+
+  # root path for the reference files
+  ref_root: "__PROJECT_ROOT__/ref"
+
+  # path where the reference fasta genome is stored
+#  ref_genome: "/data/Phil/ref_phil/GATK_resource/hg19/ucsc.hg19.fasta"
+  ref_genome: "__REF_ROOT__/hg19/ucsc.hg19.fasta"
+
+
+  # root path for the software & tools
+  soft_root: "__PROJECT_ROOT__/software"
+
+  # path where the picard tools JAR file is located
+#  picard: "/data/Phil/software/picard-tools-2.4.1/picard.jar"
+  picard: "__SOFT_ROOT__/picard-tools-2.9.0/picard.jar"
+
+
+
+
+# parameters for skewer
+skewer:
+
+  # number of threads (-t)
+  n_threads: "2"
+
+  # quality-based trimming or filtering (-q)
+  quality_threshold: "20"
+
+  # trimming mode (-m, values: 'head', 'tail', 'any', 'pe', 'mp', 'ap')
+  trimming_mode: "pe"
+
+
+# parameters for bwa
+bwa:
+
+  # number of threads (-t)
+  n_threads: "2"
+
+  # amount of RAM for Java (-Xmx..G)
+  memory: "14"
+
+  # header read group columns
+  ID: "Melanoma"
+  LB: "Melarray"
+  PL: "ILLUMINA"
+  PU: "HiSEQ4000"
 
-  # folder where the original fastq data is
-  fastq: "__ROOT__/fastq"
+  # trimming mode (-m, values: 'head', 'tail', 'any', 'pe', 'mp', 'ap')
+  trimming_mode: "pe"
diff --git a/MelArray/other/create_dag.sh b/MelArray/other/create_dag.sh
new file mode 100644
index 0000000000000000000000000000000000000000..212c4ba239ecdbd15c70c05b9336c32175dc8c5c
--- /dev/null
+++ b/MelArray/other/create_dag.sh
@@ -0,0 +1 @@
+#!/usr/bin/env bash
\ No newline at end of file
diff --git a/MelArray/other/dag_v01.dag b/MelArray/other/dag_v01.dag
new file mode 100644
index 0000000000000000000000000000000000000000..a4f544c6e8b55464550af1de6470730fcbc6f0d2
--- /dev/null
+++ b/MelArray/other/dag_v01.dag
@@ -0,0 +1,14 @@
+digraph snakemake_dag {
+    graph[bgcolor=white, margin=0];
+    node[shape=box, style=rounded, fontname=sans,                 fontsize=10, penwidth=2];
+    edge[penwidth=2, color=grey];
+	0[label = "skewer\nid: PBMCPatient1", color = "0.33 0.6 0.85", style="rounded,dashed"];
+	1[label = "all", color = "0.00 0.6 0.85", style="rounded"];
+	2[label = "skewer\nid: MTBPatient2", color = "0.33 0.6 0.85", style="rounded,dashed"];
+	3[label = "skewer\nid: MTBPatient1", color = "0.33 0.6 0.85", style="rounded,dashed"];
+	4[label = "skewer\nid: PBMCPatient2", color = "0.33 0.6 0.85", style="rounded,dashed"];
+	0 -> 1
+	2 -> 1
+	3 -> 1
+	4 -> 1
+}            
diff --git a/MelArray/other/dag_v01.png b/MelArray/other/dag_v01.png
new file mode 100644
index 0000000000000000000000000000000000000000..c7508bab5aa5b4ae52ab14fb8b01d8028131a074
Binary files /dev/null and b/MelArray/other/dag_v01.png differ
diff --git a/MelArray/other/dag_v01_hori.png b/MelArray/other/dag_v01_hori.png
new file mode 100644
index 0000000000000000000000000000000000000000..cb706a4d7e4251d356687b96af34c4d8dd5386de
Binary files /dev/null and b/MelArray/other/dag_v01_hori.png differ
diff --git a/MelArray/other/dag_v01_vert.png b/MelArray/other/dag_v01_vert.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a4bf95b700017e2158877dc0986c671dd46faf0
Binary files /dev/null and b/MelArray/other/dag_v01_vert.png differ
diff --git a/MelArray/other/dag_v02.dag b/MelArray/other/dag_v02.dag
new file mode 100644
index 0000000000000000000000000000000000000000..611be4624290830d495d0181f01fc82a6967bf07
--- /dev/null
+++ b/MelArray/other/dag_v02.dag
@@ -0,0 +1,30 @@
+digraph snakemake_dag {
+    graph[bgcolor=white, margin=0];
+    node[shape=box, style=rounded, fontname=sans,                 fontsize=10, penwidth=2];
+    edge[penwidth=2, color=grey];
+	0[label = "bwa_bam", color = "0.50 0.6 0.85", style="rounded,dashed"];
+	1[label = "mark_duplicates", color = "0.33 0.6 0.85", style="rounded"];
+	2[label = "bwa_bam", color = "0.50 0.6 0.85", style="rounded,dashed"];
+	3[label = "skewer\nid: PBMCpatient1", color = "0.17 0.6 0.85", style="rounded,dashed"];
+	4[label = "bwa_bam", color = "0.50 0.6 0.85", style="rounded,dashed"];
+	5[label = "mark_duplicates", color = "0.33 0.6 0.85", style="rounded"];
+	6[label = "bwa_bam", color = "0.50 0.6 0.85", style="rounded,dashed"];
+	7[label = "skewer\nid: MTBPatient2", color = "0.17 0.6 0.85", style="rounded,dashed"];
+	8[label = "mark_duplicates", color = "0.33 0.6 0.85", style="rounded"];
+	9[label = "mark_duplicates", color = "0.33 0.6 0.85", style="rounded"];
+	10[label = "skewer\nid: MTBPatient1", color = "0.17 0.6 0.85", style="rounded,dashed"];
+	11[label = "all", color = "0.00 0.6 0.85", style="rounded"];
+	12[label = "skewer\nid: PBMCpatient2", color = "0.17 0.6 0.85", style="rounded,dashed"];
+	10 -> 0
+	0 -> 1
+	7 -> 2
+	12 -> 4
+	2 -> 5
+	3 -> 6
+	6 -> 8
+	4 -> 9
+	5 -> 11
+	1 -> 11
+	8 -> 11
+	9 -> 11
+}            
diff --git a/MelArray/other/dag_v02_hori.png b/MelArray/other/dag_v02_hori.png
new file mode 100644
index 0000000000000000000000000000000000000000..a6092350b8ba4045361758eb2cd6b7f97afe0ca0
Binary files /dev/null and b/MelArray/other/dag_v02_hori.png differ
diff --git a/MelArray/other/dag_v02_vert.png b/MelArray/other/dag_v02_vert.png
new file mode 100644
index 0000000000000000000000000000000000000000..0a401e639617b597dbc127148cc7b4075e95ebe6
Binary files /dev/null and b/MelArray/other/dag_v02_vert.png differ
diff --git a/MelArray/original_bash_scripts/Mel_1_skewer_170228.sh b/MelArray/other/original_bash_scripts/Mel_1_skewer_170228.sh
similarity index 100%
rename from MelArray/original_bash_scripts/Mel_1_skewer_170228.sh
rename to MelArray/other/original_bash_scripts/Mel_1_skewer_170228.sh
diff --git a/MelArray/original_bash_scripts/Mel_2_bwa_GATK_170228.sh b/MelArray/other/original_bash_scripts/Mel_2_bwa_GATK_170228.sh
similarity index 100%
rename from MelArray/original_bash_scripts/Mel_2_bwa_GATK_170228.sh
rename to MelArray/other/original_bash_scripts/Mel_2_bwa_GATK_170228.sh
diff --git a/MelArray/other/rule_graph_v01.dag b/MelArray/other/rule_graph_v01.dag
new file mode 100644
index 0000000000000000000000000000000000000000..40c5e0a4a4ffe3410d35e32ec6be43a8b0445f27
--- /dev/null
+++ b/MelArray/other/rule_graph_v01.dag
@@ -0,0 +1,8 @@
+digraph snakemake_dag {
+    graph[bgcolor=white, margin=0];
+    node[shape=box, style=rounded, fontname=sans,                 fontsize=10, penwidth=2];
+    edge[penwidth=2, color=grey];
+	0[label = "all", color = "0.00 0.6 0.85", style="rounded"];
+	1[label = "skewer", color = "0.33 0.6 0.85", style="rounded"];
+	1 -> 0
+}            
diff --git a/MelArray/other/rule_graph_v01.png b/MelArray/other/rule_graph_v01.png
new file mode 100644
index 0000000000000000000000000000000000000000..2e3adb3b98dd1e773858a75de89ece46a29aadac
Binary files /dev/null and b/MelArray/other/rule_graph_v01.png differ
diff --git a/MelArray/other/rule_graph_v01_hori.png b/MelArray/other/rule_graph_v01_hori.png
new file mode 100644
index 0000000000000000000000000000000000000000..5acf11c10e6a68fa99a14a6d483a3d9e5d47bbd5
Binary files /dev/null and b/MelArray/other/rule_graph_v01_hori.png differ
diff --git a/MelArray/other/rule_graph_v01_vert.png b/MelArray/other/rule_graph_v01_vert.png
new file mode 100644
index 0000000000000000000000000000000000000000..45e0ade85db7a112e4ddf2435d98f07afb751ecd
Binary files /dev/null and b/MelArray/other/rule_graph_v01_vert.png differ
diff --git a/MelArray/other/rule_graph_v02.dag b/MelArray/other/rule_graph_v02.dag
new file mode 100644
index 0000000000000000000000000000000000000000..85b31a4b3c2d65ceeb5f3cf1b2a1abae84d60253
--- /dev/null
+++ b/MelArray/other/rule_graph_v02.dag
@@ -0,0 +1,12 @@
+digraph snakemake_dag {
+    graph[bgcolor=white, margin=0];
+    node[shape=box, style=rounded, fontname=sans,                 fontsize=10, penwidth=2];
+    edge[penwidth=2, color=grey];
+	0[label = "bwa_bam", color = "0.50 0.6 0.85", style="rounded"];
+	1[label = "all", color = "0.17 0.6 0.85", style="rounded"];
+	2[label = "skewer", color = "0.33 0.6 0.85", style="rounded"];
+	3[label = "mark_duplicates", color = "0.00 0.6 0.85", style="rounded"];
+	2 -> 0
+	3 -> 1
+	0 -> 3
+}            
diff --git a/MelArray/other/rule_graph_v02_hori.png b/MelArray/other/rule_graph_v02_hori.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b09653ac52321a4de09ce5c1e83bdd22698f460
Binary files /dev/null and b/MelArray/other/rule_graph_v02_hori.png differ
diff --git a/MelArray/other/rule_graph_v02_vert.png b/MelArray/other/rule_graph_v02_vert.png
new file mode 100644
index 0000000000000000000000000000000000000000..e2bb7e1be4eecbe45a689c519c6f2d133177d6ef
Binary files /dev/null and b/MelArray/other/rule_graph_v02_vert.png differ
diff --git a/MelArray/scripts/load_config.py b/MelArray/scripts/load_config.py
index 4906c0275770a5c52cfffff9886eb2b60c851ad8..bc839b5b5a142d60fc4104633c0a00185383d29f 100644
--- a/MelArray/scripts/load_config.py
+++ b/MelArray/scripts/load_config.py
@@ -1,6 +1,7 @@
 
 import logging
 import yaml
+import re
 
 
 def load_config(config_file_path):
@@ -11,11 +12,34 @@ def load_config(config_file_path):
     @date: 2017
     """
 
-    logging.debug("Loading config file from %s", config_file_path)
+    logging.debug("Loading config file from %s ...", config_file_path)
     with open(config_file_path, 'r') as config_file:
         config = yaml.load(config_file)
 
-    # initialize the path(s) by replacing the '__ROOT__' tag with the actual root path
-    config["path"]["fastq"] = config["path"]["fastq"].replace("__ROOT__", config["path"]['root'])
+    # exract the paths as a separate dictionary
+    paths = config['path']
+
+    # do the "root" parts replacing in the paths
+    logging.debug("Replacing '__XXXX_ROOT__' tags ...", config_file_path)
+    is_match = True
+    # repeat until no more roots have been replaced
+    while is_match:
+        is_match = False
+        # loop through each key/value pair in the "path" dictionary
+        for key, value in paths.items():
+            # replace the "__XXX_ROOT__" tags in the path with their corresponding path entry
+            #   for example: "__DATA_ROOT__" should be replaced with paths['data_root']
+            if re.match('__\w+_ROOT__', value):
+                # a match was found, make sure to loop again
+                is_match = True
+                # extract the different parts of the path
+                re_match = re.match('(.*)__(\w+)_ROOT__(.*)', value)
+                # get the root's name, like "DATA" or "REF"
+                root_name = re_match.groups()[1]
+                # get the lower case version ("data" or "ref")
+                root_name_lower = root_name.lower()
+                # replace the path: for example "__DATA_ROOT__" with paths['data_root']
+                paths[key] = value.replace("__" + root_name + "_ROOT__", paths[root_name_lower + '_root'])
+
 
     return config
\ No newline at end of file
diff --git a/Vagrantfile b/Vagrantfile
index 1a55195885faf232a8239365cc6c5090028c0619..a75540887be3c24a22a91df5e32733991e8607e5 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -37,20 +37,23 @@ Vagrant.configure("2") do |config|
   # the path on the host to the actual folder. The second argument is
   # the path on the guest to mount the folder. And the optional third
   # argument is a set of non-required options.
-  # config.vm.synced_folder "src", "target"
+  config.vm.synced_folder "/data/USZ/data", "/vagrant/MelArray/data"
+  config.vm.synced_folder "/data/USZ/ref", "/vagrant/MelArray/ref"
+  config.vm.synced_folder "/data/USZ/software", "/vagrant/MelArray/software"
 
   # Provider-specific configuration so you can fine-tune various
   # backing providers for Vagrant. These expose provider-specific options.
   # Example for VirtualBox:
-  #
-  # config.vm.provider "virtualbox" do |vb|
-  #   # Display the VirtualBox GUI when booting the machine
-  #   vb.gui = true
-  #
-  #   # Customize the amount of memory on the VM:
-  #   vb.memory = "1024"
-  # end
-  #
+
+   config.vm.provider "virtualbox" do |vb|
+     # Display the VirtualBox GUI when booting the machine
+     vb.gui = false
+     # Customize the amount of memory on the VM:
+     vb.memory = "14336"
+     # Customize the amount of CPUs on the VM:
+     vb.cpus = "2"
+   end
+
   # View the documentation for the provider you are using for more
   # information on available options.
 
diff --git a/pipeline1/dag.png b/pipeline1/dag.png
new file mode 100644
index 0000000000000000000000000000000000000000..d46adae2e744f4778a03804a9ea85ee57f112e85
Binary files /dev/null and b/pipeline1/dag.png differ
diff --git a/pipeline1/dag_partial.png b/pipeline1/dag_partial.png
new file mode 100644
index 0000000000000000000000000000000000000000..5d4d50d5a8bfc53d9e59807ef18272f406ee6fac
Binary files /dev/null and b/pipeline1/dag_partial.png differ
diff --git a/pipeline1/rule_graph.png b/pipeline1/rule_graph.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e9b3beee7df54662265b3faf73770a254add54b
Binary files /dev/null and b/pipeline1/rule_graph.png differ