diff --git a/moderation_classifier/eval_BERT.py b/moderation_classifier/eval_BERT.py
index b72a9d5ca7328771bcdd5ae2429cd25e9385bab7..3868d7f70dcae9f7c5f76834f49455f67ed4172e 100644
--- a/moderation_classifier/eval_BERT.py
+++ b/moderation_classifier/eval_BERT.py
@@ -49,16 +49,16 @@ def main(train_logs: Union[str, os.PathLike]):
         text_proc = tp.fit_transform(df_de.text)
         df_de.text = text_proc
     comon_topics = tl.get_comments_per_topic(df_de)
-
+    
     # Load tokenizer and model
     tokenizer = AutoTokenizer.from_pretrained("bert-base-german-cased")
     model = TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=path_model)
     
     # Split text into batches
     text_list = list(df_de.text.values)
-    n = 500
+    n = 100
     results=[text_list[idx:idx+n] for idx in range(0, len(text_list), n)]
-
+    import pdb; pdb.set_trace()
     # eval all
     y_pred_all = []
     for batch in tqdm(results):
@@ -70,7 +70,7 @@ def main(train_logs: Union[str, os.PathLike]):
     y_pred_all = np.concatenate(y_pred_all)
 
     precision, recall, f1, _ = precision_recall_fscore_support(
-        df_de.label, y_pred_all, average="weighted"
+        df_de.label, y_pred_all, average="binary", pos_label=0
     )
     accuracy = accuracy_score(df_de.label, y_pred_all)
 
@@ -89,7 +89,7 @@ def main(train_logs: Union[str, os.PathLike]):
         y_pred_t = y_pred_all[df_de.topic == t]
 
         precision, recall, f1, _ = precision_recall_fscore_support(
-            y_test_t, y_pred_t, average="weighted"
+            y_test_t, y_pred_t, average="binary", pos_label=0
         )
         accuracy = accuracy_score(y_test_t, y_pred_t)
         results_t[t] = dict()
diff --git a/moderation_classifier/eval_MNB.py b/moderation_classifier/eval_MNB.py
index 7867c33c1a3b6b6fa4605adfbda2f7b248dbe22a..622e12d2eedceb6974efe68748d4d39d6b1fd4c1 100644
--- a/moderation_classifier/eval_MNB.py
+++ b/moderation_classifier/eval_MNB.py
@@ -45,6 +45,7 @@ def main(train_logs: Union[str, os.PathLike]):
         remove_duplicates=False,
         min_num_words=3,
     )
+  
     X_test = df_test.text
     y_test = df_test.label
 
@@ -53,7 +54,7 @@ def main(train_logs: Union[str, os.PathLike]):
 
     y_pred_t = pipe.predict(X_test)
     precision, recall, f1, _ = precision_recall_fscore_support(
-        y_test, y_pred, average="weighted"
+        y_test, y_pred, average='weighted'
     )
     accuracy = pipe.score(X_test, y_test)
 
@@ -76,7 +77,7 @@ def main(train_logs: Union[str, os.PathLike]):
 
         y_pred_t = pipe.predict(X_test_t)
         precision, recall, f1, _ = precision_recall_fscore_support(
-            y_test_t, y_pred_t, average="weighted"
+            y_test_t, y_pred_t, average='weighted'
         )
         #f1 = f1_score(y_test_t, y_pred_t)
         accuracy = pipe.score(X_test_t, y_test_t)