Skip to content
Snippets Groups Projects
Commit 2cd7c834 authored by Franziska Oschmann's avatar Franziska Oschmann
Browse files

Smaller changes in train scripts

parent 6728a32a
No related branches found
No related tags found
1 merge request!2Dev train models
......@@ -32,6 +32,7 @@ def save_logs(
:param text_preprocessing: Boolean flag whether preprocessing was used or not
"""
logs = dict()
logs["path_repo"] = path_repo
logs["path_model"] = path
logs["input_data"] = input_data
logs["text_preprocessing"] = text_preprocessing
......
......@@ -32,7 +32,7 @@ def create_pipeline():
)
# define model
mnb = MultinomialNB(alpha=0.01)
mnb = MultinomialNB(alpha=0.1)
# set pipeline
pipe = Pipeline([("processor", tp), ("vectorizer", vectorizer), ("mnb", mnb)])
......@@ -102,16 +102,16 @@ def main(input_data: Union[str, os.PathLike]):
# Load data and extract only text from tagesanzeiger
print("Load and preprocess text")
remove_duplicates = False
remove_duplicates = True
min_num_words = 3
tl = TextLoader(input_data)
df_de = tl.load_text_csv(
newspaper="tagesanzeiger",
lang='de',
load_subset=False,
remove_duplicates=remove_duplicates,
min_num_words=min_num_words,
)
df_de = df_de.sample(50)
# Prepare data for modeling
text = df_de.text
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment