Skip to content
Snippets Groups Projects
Commit 9fcfed88 authored by Franziska Oschmann's avatar Franziska Oschmann
Browse files

Only return most common topics

parent c70cbc71
No related branches found
No related tags found
1 merge request!2Dev train models
...@@ -50,12 +50,7 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -50,12 +50,7 @@ def main(train_logs: Union[str, os.PathLike]):
df_de.text = text_proc df_de.text = text_proc
comon_topics = tl.get_comments_per_topic(df_de) comon_topics = tl.get_comments_per_topic(df_de)
df_de = df_de[:10000]
# Load tokenizer and model # Load tokenizer and model
start = timeit.timeit()
tokenizer = AutoTokenizer.from_pretrained("bert-base-german-cased") tokenizer = AutoTokenizer.from_pretrained("bert-base-german-cased")
model = TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=path_model) model = TFAutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path=path_model)
...@@ -102,9 +97,6 @@ def main(train_logs: Union[str, os.PathLike]): ...@@ -102,9 +97,6 @@ def main(train_logs: Union[str, os.PathLike]):
results_t[t]["f1"] = f1 results_t[t]["f1"] = f1
results_t[t]["precision"] = precision results_t[t]["precision"] = precision
results_t[t]["recall"] = recall results_t[t]["recall"] = recall
end = timeit.timeit()
print(end - start)
# Compute rejection rate # Compute rejection rate
reject_rate_all = np.round(df_de.label.mean(), 4) * 100 reject_rate_all = np.round(df_de.label.mean(), 4) * 100
......
...@@ -133,12 +133,8 @@ class TextLoader(object): ...@@ -133,12 +133,8 @@ class TextLoader(object):
#df = df.rename(columns={"rejected": "label"}) #df = df.rename(columns={"rejected": "label"})
topics = Counter(df["topic"]).most_common(num_topic) topics = Counter(df["topic"]).most_common(num_topic)
comm_per_topic = dict()
for t in topics:
df_topic = df[df.topic == t[0]]
comm_per_topic[t[0]] = df_topic
return comm_per_topic return topics
def find_duplicate_comments(self, df: pd.DataFrame) -> np.ndarray: def find_duplicate_comments(self, df: pd.DataFrame) -> np.ndarray:
""" " """ "
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment