diff --git a/moderation_classifier/eval_BERT.py b/moderation_classifier/eval_BERT.py
index 2ff388f35cdd3b9d8043f9d3efe79a42aa28331e..db645557710da7b5beccb304f461c761c7ef0bbb 100644
--- a/moderation_classifier/eval_BERT.py
+++ b/moderation_classifier/eval_BERT.py
@@ -1,22 +1,19 @@
-from datasets import load_dataset
-from evaluate import evaluator
-from transformers import pipeline, AutoTokenizer, TFAutoModelForSequenceClassification
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
 import tensorflow as tf
 
 import click
-import evaluate
 import numpy as np
 import os
 import pandas as pd
 from pathlib import Path
-import timeit
-from tqdm import tqdm
-from typing import Union
+from typing import List, Union
 
 from sklearn.metrics import precision_recall_fscore_support, accuracy_score
 
 from src.preprocessing_text import TextLoader, TextProcessor
-from src.prepare_bert_tf import df2dict
+from src.train_logs import load_logs
+from src.BERT_utils import predict_batches
+from src.eval_utils import gen_scores_dict
 
 
 @click.argument("train_logs")
@@ -27,21 +24,28 @@ def main(train_logs: Union[str, os.PathLike]):
     """
 
     # Load logs
-    df = pd.read_csv(train_logs, index_col="Unnamed: 0")
-    path_repo = df.loc["path_repo"].values[0]
-    path_model = df.loc["path_model"].values[0]
-    input_data = df.loc["input_data"].values[0].replace("train", "test")
-    text_preprocessing = df.loc["text_preprocessing"].values[0]
+    (
+        path_repo,
+        path_model,
+        input_data,
+        text_preprocessing,
+        newspaper,
+        lang,
+        topic,
+        remove_duplicates,
+        min_num_words,
+    ) = load_logs(train_logs)
 
     # Load data and extract only text from tagesanzeiger
     print("Load and preprocess text")
     tl = TextLoader(input_data)
     df_de = tl.load_text_csv(
-        newspaper="tagesanzeiger",
-        lang='de',
+        newspaper=newspaper,
+        lang=lang,
+        topic=topic,
         load_subset=False,
-        remove_duplicates=True,
-        min_num_words=3,
+        remove_duplicates=remove_duplicates,
+        min_num_words=min_num_words,
     )
 
     if text_preprocessing:
@@ -49,37 +53,24 @@ def main(train_logs: Union[str, os.PathLike]):
         text_proc = tp.fit_transform(df_de.text)
         df_de.text = text_proc
     comon_topics = tl.get_comments_per_topic(df_de)
-    
+
     # Load tokenizer and model
     tokenizer = AutoTokenizer.from_pretrained("bert-base-german-cased")
-    model = TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=path_model)
-    
-    # Split text into batches
-    text_list = list(df_de.text.values)
-    n = 100
-    results=[text_list[idx:idx+n] for idx in range(0, len(text_list), n)]
-    import pdb; pdb.set_trace()
-    # eval all
-    y_pred_all = []
-    for batch in tqdm(results):
-        inputs = tokenizer(batch, return_tensors="tf", padding=True, truncation=True)
-        logits = model(**inputs).logits
-        y_pred_batch = tf.argmax(logits,axis=1)
-        y_pred_all.append(y_pred_batch)
+    model = TFAutoModelForSequenceClassification.from_pretrained(
+        pretrained_model_name_or_path=path_model
+    )
 
-    y_pred_all = np.concatenate(y_pred_all)
+    # Split text into batches
+    y_pred_all = predict_batches(df_de.text.values, model, tokenizer)
 
+    # eval all
     precision, recall, f1, _ = precision_recall_fscore_support(
-        df_de.label, y_pred_all, average='weighted'
+        df_de.label, y_pred_all, average="weighted"
     )
     accuracy = accuracy_score(df_de.label, y_pred_all)
 
-    results_all = dict()
-    results_all["precision"] = precision
-    results_all["recall"] = recall
-    results_all["f1"] = f1
-    results_all["accuracy"] = accuracy
-    
+    results_all = gen_scores_dict(precision, recall, f1, accuracy)
+
     # eval per topic
     topics = [t[0] for t in comon_topics]
     results_t = dict()
@@ -89,20 +80,16 @@ def main(train_logs: Union[str, os.PathLike]):
         y_pred_t = y_pred_all[df_de.topic == t]
 
         precision, recall, f1, _ = precision_recall_fscore_support(
-            y_test_t, y_pred_t, average='weighted'
+            y_test_t, y_pred_t, average="weighted"
         )
         accuracy = accuracy_score(y_test_t, y_pred_t)
-        results_t[t] = dict()
-        results_t[t]["accuracy"] = accuracy
-        results_t[t]["f1"] = f1
-        results_t[t]["precision"] = precision
-        results_t[t]["recall"] = recall
+
+        results_t[t] = gen_scores_dict(precision, recall, f1, accuracy)
 
     # Compute rejection rate
     reject_rate_all = np.round(df_de.label.mean(), 4) * 100
     reject_rate_topic = [
-        np.round(df_de[df_de.topic == k].label.mean(), 4) * 100
-        for k in topics
+        np.round(df_de[df_de.topic == k].label.mean(), 4) * 100 for k in topics
     ]
 
     # Compute number comments
@@ -110,9 +97,7 @@ def main(train_logs: Union[str, os.PathLike]):
     num_comm_topic = [df_de[df_de.topic == k].shape[0] for k in topics]
 
     # Save results
-    df_res_all = pd.DataFrame().from_dict(
-        results_all, orient="index", columns=["all"]
-    )
+    df_res_all = pd.DataFrame().from_dict(results_all, orient="index", columns=["all"])
     df_res_all.loc["rejection rate"] = reject_rate_all
     df_res_all.loc["number comments"] = num_comm_all
 
@@ -123,7 +108,9 @@ def main(train_logs: Union[str, os.PathLike]):
     df_res = df_res_all.join(df_res_topic)
     df_res.loc["data"] = [input_data] * df_res.shape[1]
 
-    df_res.to_csv(path_repo + "/results/results_eval_BERT/" + Path(path_model).stem + ".csv")
+    df_res.to_csv(
+        path_repo + "/results/results_eval_BERT/" + Path(path_model).stem + ".csv"
+    )
 
 
 if __name__ == "__main__":
diff --git a/moderation_classifier/eval_MNB.py b/moderation_classifier/eval_MNB.py
index 622e12d2eedceb6974efe68748d4d39d6b1fd4c1..3b1697fa4a6aaf749c5574de07bb6d369350a9bc 100644
--- a/moderation_classifier/eval_MNB.py
+++ b/moderation_classifier/eval_MNB.py
@@ -1,24 +1,17 @@
 import click
 from collections import Counter
-from joblib import load
 import numpy as np
 import pandas as pd
 from pathlib import Path
-from sklearn.metrics import f1_score, precision_recall_fscore_support
+from sklearn.metrics import precision_recall_fscore_support
 
 from typing import Union
 import os
 
+from src.MNB_utils import load_model
 from src.preprocessing_text import TextLoader
-
-
-def load_model(path):
-    """
-    Loads trained model
-    """
-    pipe = load(path)
-
-    return pipe
+from src.train_logs import load_logs
+from src.eval_utils import gen_scores_dict
 
 
 @click.argument("train_logs")
@@ -29,9 +22,17 @@ def main(train_logs: Union[str, os.PathLike]):
     """
 
     # Load logs
-    df = pd.read_csv(train_logs, index_col="Unnamed: 0")
-    path_model = df.loc["path_model"].values[0]
-    input_data = df.loc["input_data"].values[0].replace("train", "test")
+    (
+        path_repo,
+        path_model,
+        input_data,
+        _,
+        newspaper,
+        lang,
+        topic,
+        remove_duplicates,
+        min_num_words,
+    ) = load_logs(train_logs)
 
     # Load model
     pipe = load_model(path_model)
@@ -39,32 +40,27 @@ def main(train_logs: Union[str, os.PathLike]):
     # Load test data
     tl = TextLoader(input_data)
     df_test = tl.load_text_csv(
-        newspaper="tagesanzeiger",
-        lang='de',
+        newspaper=newspaper,
+        lang=lang,
+        topic=topic,
         load_subset=False,
-        remove_duplicates=False,
-        min_num_words=3,
+        remove_duplicates=remove_duplicates,
+        min_num_words=min_num_words,
     )
-  
+
     X_test = df_test.text
     y_test = df_test.label
 
     # Make prediction
     y_pred = pipe.predict(X_test)
 
-    y_pred_t = pipe.predict(X_test)
+    # Compute scores and add to dict
     precision, recall, f1, _ = precision_recall_fscore_support(
-        y_test, y_pred, average='weighted'
+        y_test, y_pred, average="weighted"
     )
     accuracy = pipe.score(X_test, y_test)
 
-    results_all = dict()
-    results_all["precision"] = precision
-    results_all["recall"] = recall
-    results_all["f1"] = f1
-    results_all["accuracy"] = accuracy
-
-    #import pdb; pdb.set_trace()
+    results_all = gen_scores_dict(precision, recall, f1, accuracy)
 
     # Get results per topic
     count_topics = Counter(df_test["topic"]).most_common(10)
@@ -77,15 +73,11 @@ def main(train_logs: Union[str, os.PathLike]):
 
         y_pred_t = pipe.predict(X_test_t)
         precision, recall, f1, _ = precision_recall_fscore_support(
-            y_test_t, y_pred_t, average='weighted'
+            y_test_t, y_pred_t, average="weighted"
         )
-        #f1 = f1_score(y_test_t, y_pred_t)
         accuracy = pipe.score(X_test_t, y_test_t)
-        results_t[t] = dict()
-        results_t[t]["accuracy"] = accuracy
-        results_t[t]["f1"] = f1
-        results_t[t]["precision"] = precision
-        results_t[t]["recall"] = recall
+
+        results_t[t] = gen_scores_dict(precision, recall, f1, accuracy)
 
     # Compute rejection rate
     reject_rate_all = np.round(df_test.label.mean(), 4) * 100
@@ -109,7 +101,7 @@ def main(train_logs: Union[str, os.PathLike]):
     df_res = df_res_all.join(df_res_topic)
     df_res.loc["data"] = [input_data] * df_res.shape[1]
 
-    df_res.to_csv("results/results_eval_MNB/" + Path(path_model).stem + ".csv")
+    df_res.to_csv(path_repo + "results/results_eval_MNB/" + Path(path_model).stem + ".csv")
 
 
 if __name__ == "__main__":
diff --git a/moderation_classifier/train_BERT.py b/moderation_classifier/train_BERT.py
index 1fd2ee1596b6d9cb70d0d2815e4a47a65339c5f2..49a25ee27079058af05707317ce8dd091f976d1f 100644
--- a/moderation_classifier/train_BERT.py
+++ b/moderation_classifier/train_BERT.py
@@ -16,34 +16,7 @@ from typing import Union
 
 from src.preprocessing_text import TextLoader, TextProcessor
 from src.prepare_bert_tf import df2dict, compute_metrics, prepare_training
-
-
-def save_logs(
-    path_repo: Union[str, os.PathLike],
-    path: Union[str, os.PathLike],
-    input_data: Union[str, os.PathLike],
-    text_preprocessing: bool,
-):
-    """
-    Saves training logs whch can be used during evaluation
-    :param path_repo: Path to repository
-    :param path: Path to trained model
-    :param input_data: Path to used train data
-    :param text_preprocessing: Boolean flag whether preprocessing was used or not
-    """
-    logs = dict()
-    logs["path_repo"] = path_repo
-    logs["path_model"] = path
-    logs["input_data"] = input_data
-    logs["text_preprocessing"] = text_preprocessing
-
-    path_logs = (path_repo).joinpath("saved_models/BERT_logs/")
-    if not os.path.exists(path_logs):
-        os.makedirs(path_logs)
-
-    df_logs = pd.DataFrame.from_dict(logs, orient="index", columns=["logs"])
-
-    df_logs.to_csv(path_logs.joinpath(path.stem).with_suffix(".csv"))
+from src.train_logs import save_logs
 
 
 @click.argument("input_data")
@@ -66,12 +39,19 @@ def main(input_data: Union[str, os.PathLike], text_preprocessing: bool):
 
     # Load data and extract only text from tagesanzeiger
     print("Load and preprocess text")
+    newspaper = "tagesanzeiger"
+    lang = "de"
+    topic = "Wissen"
+    remove_duplicates = True
+    min_num_words = 3
     tl = TextLoader(input_data)
     df_de = tl.load_text_csv(
-        newspaper="tagesanzeiger",
+        newspaper=newspaper,
+        lang=lang,
+        topic=topic,
         load_subset=False,
-        remove_duplicates=True,
-        min_num_words=3,
+        remove_duplicates=remove_duplicates,
+        min_num_words=min_num_words,
     )
 
     if text_preprocessing:
@@ -133,13 +113,17 @@ def main(input_data: Union[str, os.PathLike], text_preprocessing: bool):
     callbacks = [metric_callback, checkpoint_callback, tensorboard_callback]
 
     # Fit model
-    print('Train model')
+    print("Train model")
     model.fit(
-        x=tf_train_set, validation_data=tf_validation_set, epochs=5, verbose=2, callbacks=callbacks
+        x=tf_train_set,
+        validation_data=tf_validation_set,
+        epochs=5,
+        verbose=2,
+        callbacks=callbacks,
     )
 
     # Save model
-    print('Save model')
+    print("Save model")
     path_model = (p_repo).joinpath("saved_models/" + time_stemp)
     model.save_pretrained(path_model)
     tokenizer.save_pretrained(path_model)
@@ -147,12 +131,18 @@ def main(input_data: Union[str, os.PathLike], text_preprocessing: bool):
     # Save model logs
     save_logs(
         path_repo=p_repo,
-        path=path_model,
+        path_model=path_model,
         input_data=input_data,
-        text_preprocessing=text_preprocessing,
+        text_preprocessing=True,
+        newspaper=newspaper,
+        lang=lang,
+        topic=topic,
+        remove_duplicates=remove_duplicates,
+        min_num_words=min_num_words,
+        model_name="BERT",
     )
 
-    print('Done')
+    print("Done")
 
 
 if __name__ == "__main__":
diff --git a/moderation_classifier/train_MNB.py b/moderation_classifier/train_MNB.py
index 5b1a5e31545d5d02147ace854474193a9dee6fb8..7f6c350e1349acff53bc12ff44857e280f08a0b5 100644
--- a/moderation_classifier/train_MNB.py
+++ b/moderation_classifier/train_MNB.py
@@ -1,92 +1,13 @@
 from sklearn.model_selection import train_test_split
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.pipeline import Pipeline
 
 import click
-import datetime
-from joblib import dump
-from nltk.corpus import stopwords
 from pathlib import Path
-import pandas as pd
-import spacy
-
 from typing import Union
 import os
 
-from src.preprocessing_text import TextLoader, TextProcessor
-
-
-def create_pipeline():
-    """
-    Creates classification pipeline
-    """
-
-    # define preprocessor
-    tp = TextProcessor()
-
-    # define vectorizer
-    stop_words_ge = stopwords.words("german")
-    vectorizer = TfidfVectorizer(
-        stop_words=stop_words_ge, ngram_range=(1, 4), max_features=3000
-    )
-
-    # define model
-    mnb = MultinomialNB(alpha=0.1)
-
-    # set pipeline
-    pipe = Pipeline([("processor", tp), ("vectorizer", vectorizer), ("mnb", mnb)])
-
-    return pipe
-
-
-def create_path() -> Union[str, os.PathLike]:
-    """
-    Creates path to store trained model
-    """
-    if not os.path.exists("saved_models/MNB/"):
-        os.makedirs("saved_models/MNB/")
-
-    timestemp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
-
-    return Path("saved_models/MNB/" + timestemp + ".joblib")
-
-
-def save_model(pipe: Pipeline, path):
-    """
-    Saves trained model
-    :param pipe: Trained pipeline
-    """
-    dump(pipe, path)
-
-
-def save_logs(
-    path_repo: Union[str, os.PathLike],
-    path: Union[str, os.PathLike],
-    input_data: Union[str, os.PathLike],
-    text_preprocessing: bool,
-    val_score: float,
-):
-    """
-    Saves training logs whch can be used during evaluation
-    :param path_repo: Path to repository
-    :param path: Path to trained model
-    :param input_data: Path to used train data
-    :param text_preprocessing: Boolean flag whether preprocessing was used or not
-    """
-    logs = dict()
-    logs["path_model"] = path
-    logs["input_data"] = input_data
-    logs["text_preprocessing"] = text_preprocessing
-    logs["val_score"] = val_score
-
-    path_logs = (path_repo).joinpath("saved_models/MNB_logs/")
-    if not os.path.exists(path_logs):
-        os.makedirs(path_logs)
-
-    df_logs = pd.DataFrame.from_dict(logs, orient="index", columns=["logs"])
-
-    df_logs.to_csv(path_logs.joinpath(path.stem).with_suffix(".csv"))
+from src.MNB_utils import create_pipeline, create_path, save_model
+from src.preprocessing_text import TextLoader
+from src.train_logs import save_logs
 
 
 @click.argument("input_data")
@@ -102,12 +23,16 @@ def main(input_data: Union[str, os.PathLike]):
 
     # Load data and extract only text from tagesanzeiger
     print("Load and preprocess text")
+    newspaper = "tagesanzeiger"
+    lang = "de"
+    topic = "Wissen"
     remove_duplicates = True
     min_num_words = 3
     tl = TextLoader(input_data)
     df_de = tl.load_text_csv(
-        newspaper="tagesanzeiger",
-        lang='de',
+        newspaper=newspaper,
+        lang=lang,
+        topic=topic,
         load_subset=False,
         remove_duplicates=remove_duplicates,
         min_num_words=min_num_words,
@@ -130,9 +55,15 @@ def main(input_data: Union[str, os.PathLike]):
     save_model(pipe, path)
     save_logs(
         path_repo=p_repo,
-        path=path,
+        path_model=path,
         input_data=input_data,
         text_preprocessing=True,
+        newspaper=newspaper,
+        lang=lang,
+        topic=topic,
+        remove_duplicates=remove_duplicates,
+        min_num_words=min_num_words,
+        model_name="MNB",
         val_score=val_score,
     )
 
diff --git a/src/BERT_utils.py b/src/BERT_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..728c40b6b92558705b75adaf75a0c8b31f31690a
--- /dev/null
+++ b/src/BERT_utils.py
@@ -0,0 +1,33 @@
+import numpy as np
+import tensorflow as tf
+from tqdm import tqdm
+
+from typing import List
+
+def split_batches(text: np.ndarray, batch_size: int=100) -> List:
+    """
+    Splits list with comments into batches
+    :param text: Array containing comments
+    :param batch_size: Number of comments per batch
+    """
+    text_list = list(text)
+    text_batches=[text_list[idx:idx+batch_size] for idx in range(0, len(text_list), batch_size)]
+    return text_batches
+
+def predict_batches(text: np.ndarray, model, tokenizer) -> np.ndarray:
+    """
+    Makes prediction for all batches and combines all predictions
+    :param text: Array containing comments
+    :param model:
+    :param tokenizer:
+    """
+    text_batches = split_batches(text)
+    y_pred_all = []
+    for batch in tqdm(text_batches):
+        inputs = tokenizer(batch, return_tensors="tf", padding=True, truncation=True)
+        logits = model(**inputs).logits
+        y_pred_batch = tf.argmax(logits,axis=1)
+        y_pred_all.append(y_pred_batch)
+    y_pred_all = np.concatenate(y_pred_all)
+
+    return y_pred_all
\ No newline at end of file
diff --git a/src/MNB_utils.py b/src/MNB_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6db6889735594c494a1f3680a5e72d60eff09a9
--- /dev/null
+++ b/src/MNB_utils.py
@@ -0,0 +1,67 @@
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.pipeline import Pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+from nltk.corpus import stopwords
+
+import datetime
+from joblib import dump, load
+import os
+from pathlib import Path
+
+from typing import Union
+
+from src.preprocessing_text import TextProcessor
+
+
+def create_pipeline() -> Pipeline:
+    """
+    Creates classification pipeline
+    """
+
+    # define preprocessor
+    tp = TextProcessor()
+
+    # define vectorizer
+    stop_words_ge = stopwords.words("german")
+    vectorizer = TfidfVectorizer(
+        stop_words=stop_words_ge, ngram_range=(1, 4), max_features=3000
+    )
+
+    # define model
+    mnb = MultinomialNB(alpha=0.1)
+
+    # set pipeline
+    pipe = Pipeline([("processor", tp), ("vectorizer", vectorizer), ("mnb", mnb)])
+
+    return pipe
+
+
+def create_path() -> Union[str, os.PathLike]:
+    """
+    Creates path to store trained model
+    """
+    if not os.path.exists("saved_models/MNB/"):
+        os.makedirs("saved_models/MNB/")
+
+    timestemp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+
+    return Path("saved_models/MNB/" + timestemp + ".joblib")
+
+
+def save_model(pipe: Pipeline, path):
+    """
+    Saves trained model
+    :param pipe: Trained pipeline
+    """
+    dump(pipe, path)
+
+
+def load_model(path: Union[str, os.PathLike]) -> Pipeline:
+    """
+    Loads trained model
+    :param path: Path to pipeline
+    """
+    pipe = load(path)
+
+    return pipe
diff --git a/src/eval_utils.py b/src/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ac84a1eecb3ab9dbee79581239dce9c8b112569
--- /dev/null
+++ b/src/eval_utils.py
@@ -0,0 +1,15 @@
+def gen_scores_dict(precision: float, recall: float, f1: float, accuracy: float):
+    """
+    Generates dictionary containing most important scores
+    :param precision: Precision score
+    :param recall: Recall score
+    :param f1: F1 score
+    :param accuracy: Accuracy score
+    """
+    results = dict()
+    results["precision"] = precision
+    results["recall"] = recall
+    results["f1"] = f1
+    results["accuracy"] = accuracy
+
+    return results
diff --git a/src/prepare_bert_tf.py b/src/prepare_bert_tf.py
index 0b96b5ab950b37af2f9a7e36f6cd0e13ef151fe2..e5e315f4de03b22afb16694fb4f3d797f4a5e0de 100644
--- a/src/prepare_bert_tf.py
+++ b/src/prepare_bert_tf.py
@@ -13,7 +13,7 @@ def df2dict(df: pd.DataFrame, test_size: float = 0.2, split_data: bool = True):
     :param test_size: size of test set
     :param split_data: whether data should be split or not
     """
-    #df = df.sample(200)
+    #df.sample(10000, replace=True)
 
     if split_data:
         train, test = train_test_split(df, test_size=test_size)
diff --git a/src/preprocessing_text.py b/src/preprocessing_text.py
index 0829b04a28bd3697e6dafda6d9438c2eb0de52d9..aaa2491faad70d0a71c5f0241e6deaee04bfc50e 100644
--- a/src/preprocessing_text.py
+++ b/src/preprocessing_text.py
@@ -35,6 +35,7 @@ class TextLoader(object):
         self,
         newspaper: str = None,
         lang: str = None,
+        topic: str = None,
         load_subset: bool = False,
         remove_duplicates: bool = False,
         min_num_words: int = None,
@@ -63,7 +64,7 @@ class TextLoader(object):
 
         df = df.rename(columns={"rejected": "label"})
         df_filter = self.filter_df(
-            df, min_num_words, remove_duplicates, newspaper, lang
+            df, min_num_words, remove_duplicates, newspaper, lang, topic,
         )
 
         return df_filter
@@ -75,6 +76,7 @@ class TextLoader(object):
         remove_duplicates: bool,
         newspaper: str,
         lang: str,
+        topic: str,
     ) -> pd.DataFrame:
         """
         Filters data depending on given arguments.
@@ -94,6 +96,9 @@ class TextLoader(object):
         if lang:
             df = self.filter_language(df, lang=lang)
 
+        if topic:
+            df = self.filter_topic(df, topic=topic)
+
         if remove_duplicates:
             df = self.remove_duplicate_comments(df)
 
@@ -116,6 +121,14 @@ class TextLoader(object):
         :param lang: Language
         """
         return df.loc[(df.language == lang)]
+    
+    def filter_topic(self, df: pd.DataFrame, topic: str):
+        """
+        Filters out comments with specific topic
+        :param df: Input dataframe
+        :param lang: Language
+        """
+        return df.loc[(df.topic == topic)]
 
     def filter_min_words(self, df: pd.DataFrame, min_words: int = 3):
         """Filters out comments with less than min words
@@ -146,12 +159,17 @@ class TextLoader(object):
             np.where(np.array(list(c_comm.values())) > 1)
         ]
 
+        # indices_repetitions = np.concatenate(
+        #     [
+        #         np.where(df.text == d)[0][
+        #             np.argsort(df.createdAt.iloc[np.where(df.text == d)[0]].values)[:-1]
+        #         ]
+        #         for d in tqdm(duplicate_comments)
+        #     ]
+        # )
         indices_repetitions = np.concatenate(
             [
-                np.where(df.text == d)[0][
-                    np.argsort(df.createdAt.iloc[np.where(df.text == d)[0]].values)[:-1]
-                ]
-                for d in tqdm(duplicate_comments)
+                np.where(df.text == d)[0] for d in tqdm(duplicate_comments)
             ]
         )
 
diff --git a/src/train_logs.py b/src/train_logs.py
new file mode 100644
index 0000000000000000000000000000000000000000..e560b3a33b1aa4089e74ff8b2da5a6e1792ca97f
--- /dev/null
+++ b/src/train_logs.py
@@ -0,0 +1,92 @@
+from typing import Tuple, Union, Optional
+import os
+
+import pandas as pd
+
+
+def save_logs(
+    path_repo: Union[str, os.PathLike],
+    path_model: Union[str, os.PathLike],
+    input_data: Union[str, os.PathLike],
+    text_preprocessing: bool,
+    newspaper: str,
+    lang: str,
+    topic: str,
+    remove_duplicates: bool,
+    min_num_words: int,
+    model_name: str,
+    val_score: Optional[Union[str, os.PathLike]] = None,
+):
+    """
+    Saves training logs which can be used during evaluation
+    :param path_repo: Path to repository
+    :param path_model: Path to trained model
+    :param input_data: Path to used train data
+    :param text_preprocessing: Boolean flag whether preprocessing was used or not
+    :param newspaper: Name of newspaper
+    :param lang: Selected language
+    :param topic: Selected topic
+    :param remove_duplicates: Boolean flag whether duplicates should be removed
+    :param min_num_words: Minimum number of words per comment
+    :param model_name: Name of model
+    """
+    logs = dict()
+    logs["path_repo"] = path_repo
+    logs["path_model"] = path_model
+    logs["input_data"] = input_data
+    logs["text_preprocessing"] = text_preprocessing
+    logs["newspaper"] = newspaper
+    logs["lang"] = lang
+    logs["topic"] = topic
+    logs["remove_duplicates"] = remove_duplicates
+    logs["min_num_words"] = min_num_words
+    logs["va_score"] = val_score
+
+    path_logs = (path_repo).joinpath("saved_models/" + model_name + "_logs/")
+    if not os.path.exists(path_logs):
+        os.makedirs(path_logs)
+
+    df_logs = pd.DataFrame.from_dict(logs, orient="index", columns=["logs"])
+
+    df_logs.to_csv(path_logs.joinpath(path_model.stem).with_suffix(".csv"))
+
+
+def load_logs(
+    train_logs: Union[str, os.PathLike]
+) -> Tuple[
+    Union[str, os.PathLike],
+    Union[str, os.PathLike],
+    str,
+    bool,
+    str,
+    str,
+    str,
+    bool,
+    int,
+]:
+    """
+    Loads training logs
+    :param train_logs: Path to csv-file containing logs
+    """
+    df = pd.read_csv(train_logs, index_col="Unnamed: 0")
+    path_repo = df.loc["path_repo"].values[0]
+    path_model = df.loc["path_model"].values[0]
+    input_data = df.loc["input_data"].values[0].replace("train", "test")
+    text_preprocessing = df.loc["text_preprocessing"].values[0]
+    newspaper = df.loc["newspaper"].values[0]
+    lang = df.loc["lang"].values[0]
+    topic = df.loc["topic"].values[0]
+    remove_duplicates = df.loc["remove_duplicates"].values[0]
+    min_num_words = df.loc["min_num_words"].values[0]
+
+    return (
+        path_repo,
+        path_model,
+        input_data,
+        text_preprocessing,
+        newspaper,
+        lang,
+        topic,
+        remove_duplicates,
+        min_num_words,
+    )