From c70cbc7141a768e7b7024e4577eb46f24e94a9f2 Mon Sep 17 00:00:00 2001 From: Franziska Oschmann <franziskaoschmann@staff-net-oct-dock-1-a-dhcp-100.intern.ethz.ch> Date: Thu, 13 Jul 2023 11:26:02 +0200 Subject: [PATCH] Fix issue with F1 computing --- moderation_classifier/eval_MNB.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/moderation_classifier/eval_MNB.py b/moderation_classifier/eval_MNB.py index 154ae18..7867c33 100644 --- a/moderation_classifier/eval_MNB.py +++ b/moderation_classifier/eval_MNB.py @@ -30,7 +30,7 @@ def main(train_logs: Union[str, os.PathLike]): # Load logs df = pd.read_csv(train_logs, index_col="Unnamed: 0") - path_model = df.loc["path"].values[0] + path_model = df.loc["path_model"].values[0] input_data = df.loc["input_data"].values[0].replace("train", "test") # Load model @@ -40,6 +40,7 @@ def main(train_logs: Union[str, os.PathLike]): tl = TextLoader(input_data) df_test = tl.load_text_csv( newspaper="tagesanzeiger", + lang='de', load_subset=False, remove_duplicates=False, min_num_words=3, @@ -51,17 +52,16 @@ def main(train_logs: Union[str, os.PathLike]): y_pred = pipe.predict(X_test) y_pred_t = pipe.predict(X_test) - precision, recall, *_ = precision_recall_fscore_support( + precision, recall, f1, _ = precision_recall_fscore_support( y_test, y_pred, average="weighted" ) - f1 = f1_score(y_test, y_pred) - score = pipe.score(X_test, y_test) + accuracy = pipe.score(X_test, y_test) results_all = dict() results_all["precision"] = precision results_all["recall"] = recall results_all["f1"] = f1 - results_all["score"] = score + results_all["accuracy"] = accuracy #import pdb; pdb.set_trace() @@ -75,10 +75,10 @@ def main(train_logs: Union[str, os.PathLike]): y_test_t = df_test[df_test.topic == t].label y_pred_t = pipe.predict(X_test_t) - precision, recall, *_ = precision_recall_fscore_support( + precision, recall, f1, _ = precision_recall_fscore_support( y_test_t, y_pred_t, average="weighted" ) - f1 = f1_score(y_test_t, y_pred_t) + #f1 = f1_score(y_test_t, y_pred_t) accuracy = pipe.score(X_test_t, y_test_t) results_t[t] = dict() results_t[t]["accuracy"] = accuracy -- GitLab