Skip to content
Snippets Groups Projects
Commit af05060b authored by Franziska Oschmann's avatar Franziska Oschmann
Browse files

Adjust averaging of precision, recall, f1

parent 9fcfed88
No related branches found
No related tags found
1 merge request!2Dev train models
...@@ -49,16 +49,16 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -49,16 +49,16 @@ def main(train_logs: Union[str, os.PathLike]):
text_proc = tp.fit_transform(df_de.text) text_proc = tp.fit_transform(df_de.text)
df_de.text = text_proc df_de.text = text_proc
comon_topics = tl.get_comments_per_topic(df_de) comon_topics = tl.get_comments_per_topic(df_de)
# Load tokenizer and model # Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("bert-base-german-cased") tokenizer = AutoTokenizer.from_pretrained("bert-base-german-cased")
model = TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=path_model) model = TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=path_model)
# Split text into batches # Split text into batches
text_list = list(df_de.text.values) text_list = list(df_de.text.values)
n = 500 n = 100
results=[text_list[idx:idx+n] for idx in range(0, len(text_list), n)] results=[text_list[idx:idx+n] for idx in range(0, len(text_list), n)]
import pdb; pdb.set_trace()
# eval all # eval all
y_pred_all = [] y_pred_all = []
for batch in tqdm(results): for batch in tqdm(results):
...@@ -70,7 +70,7 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -70,7 +70,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_all = np.concatenate(y_pred_all) y_pred_all = np.concatenate(y_pred_all)
precision, recall, f1, _ = precision_recall_fscore_support( precision, recall, f1, _ = precision_recall_fscore_support(
df_de.label, y_pred_all, average="weighted" df_de.label, y_pred_all, average="binary", pos_label=0
) )
accuracy = accuracy_score(df_de.label, y_pred_all) accuracy = accuracy_score(df_de.label, y_pred_all)
...@@ -89,7 +89,7 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -89,7 +89,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_t = y_pred_all[df_de.topic == t] y_pred_t = y_pred_all[df_de.topic == t]
precision, recall, f1, _ = precision_recall_fscore_support( precision, recall, f1, _ = precision_recall_fscore_support(
y_test_t, y_pred_t, average="weighted" y_test_t, y_pred_t, average="binary", pos_label=0
) )
accuracy = accuracy_score(y_test_t, y_pred_t) accuracy = accuracy_score(y_test_t, y_pred_t)
results_t[t] = dict() results_t[t] = dict()
......
...@@ -45,6 +45,7 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -45,6 +45,7 @@ def main(train_logs: Union[str, os.PathLike]):
remove_duplicates=False, remove_duplicates=False,
min_num_words=3, min_num_words=3,
) )
X_test = df_test.text X_test = df_test.text
y_test = df_test.label y_test = df_test.label
...@@ -53,7 +54,7 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -53,7 +54,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_t = pipe.predict(X_test) y_pred_t = pipe.predict(X_test)
precision, recall, f1, _ = precision_recall_fscore_support( precision, recall, f1, _ = precision_recall_fscore_support(
y_test, y_pred, average="weighted" y_test, y_pred, average='weighted'
) )
accuracy = pipe.score(X_test, y_test) accuracy = pipe.score(X_test, y_test)
...@@ -76,7 +77,7 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -76,7 +77,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_t = pipe.predict(X_test_t) y_pred_t = pipe.predict(X_test_t)
precision, recall, f1, _ = precision_recall_fscore_support( precision, recall, f1, _ = precision_recall_fscore_support(
y_test_t, y_pred_t, average="weighted" y_test_t, y_pred_t, average='weighted'
) )
#f1 = f1_score(y_test_t, y_pred_t) #f1 = f1_score(y_test_t, y_pred_t)
accuracy = pipe.score(X_test_t, y_test_t) accuracy = pipe.score(X_test_t, y_test_t)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment