Skip to content
Snippets Groups Projects
Commit af05060b authored by Franziska Oschmann's avatar Franziska Oschmann
Browse files

Adjust averaging of precision, recall, f1

parent 9fcfed88
No related branches found
No related tags found
1 merge request!2Dev train models
......@@ -49,16 +49,16 @@ def main(train_logs: Union[str, os.PathLike]):
text_proc = tp.fit_transform(df_de.text)
df_de.text = text_proc
comon_topics = tl.get_comments_per_topic(df_de)
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("bert-base-german-cased")
model = TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=path_model)
# Split text into batches
text_list = list(df_de.text.values)
n = 500
n = 100
results=[text_list[idx:idx+n] for idx in range(0, len(text_list), n)]
import pdb; pdb.set_trace()
# eval all
y_pred_all = []
for batch in tqdm(results):
......@@ -70,7 +70,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_all = np.concatenate(y_pred_all)
precision, recall, f1, _ = precision_recall_fscore_support(
df_de.label, y_pred_all, average="weighted"
df_de.label, y_pred_all, average="binary", pos_label=0
)
accuracy = accuracy_score(df_de.label, y_pred_all)
......@@ -89,7 +89,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_t = y_pred_all[df_de.topic == t]
precision, recall, f1, _ = precision_recall_fscore_support(
y_test_t, y_pred_t, average="weighted"
y_test_t, y_pred_t, average="binary", pos_label=0
)
accuracy = accuracy_score(y_test_t, y_pred_t)
results_t[t] = dict()
......
......@@ -45,6 +45,7 @@ def main(train_logs: Union[str, os.PathLike]):
remove_duplicates=False,
min_num_words=3,
)
X_test = df_test.text
y_test = df_test.label
......@@ -53,7 +54,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_t = pipe.predict(X_test)
precision, recall, f1, _ = precision_recall_fscore_support(
y_test, y_pred, average="weighted"
y_test, y_pred, average='weighted'
)
accuracy = pipe.score(X_test, y_test)
......@@ -76,7 +77,7 @@ def main(train_logs: Union[str, os.PathLike]):
y_pred_t = pipe.predict(X_test_t)
precision, recall, f1, _ = precision_recall_fscore_support(
y_test_t, y_pred_t, average="weighted"
y_test_t, y_pred_t, average='weighted'
)
#f1 = f1_score(y_test_t, y_pred_t)
accuracy = pipe.score(X_test_t, y_test_t)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment