Skip to content
Snippets Groups Projects
Commit 2cd7c834 authored by Franziska Oschmann's avatar Franziska Oschmann
Browse files

Smaller changes in train scripts

parent 6728a32a
No related branches found
No related tags found
1 merge request!2Dev train models
...@@ -32,6 +32,7 @@ def save_logs( ...@@ -32,6 +32,7 @@ def save_logs(
:param text_preprocessing: Boolean flag whether preprocessing was used or not :param text_preprocessing: Boolean flag whether preprocessing was used or not
""" """
logs = dict() logs = dict()
logs["path_repo"] = path_repo
logs["path_model"] = path logs["path_model"] = path
logs["input_data"] = input_data logs["input_data"] = input_data
logs["text_preprocessing"] = text_preprocessing logs["text_preprocessing"] = text_preprocessing
......
...@@ -32,7 +32,7 @@ def create_pipeline(): ...@@ -32,7 +32,7 @@ def create_pipeline():
) )
# define model # define model
mnb = MultinomialNB(alpha=0.01) mnb = MultinomialNB(alpha=0.1)
# set pipeline # set pipeline
pipe = Pipeline([("processor", tp), ("vectorizer", vectorizer), ("mnb", mnb)]) pipe = Pipeline([("processor", tp), ("vectorizer", vectorizer), ("mnb", mnb)])
...@@ -102,16 +102,16 @@ def main(input_data: Union[str, os.PathLike]): ...@@ -102,16 +102,16 @@ def main(input_data: Union[str, os.PathLike]):
# Load data and extract only text from tagesanzeiger # Load data and extract only text from tagesanzeiger
print("Load and preprocess text") print("Load and preprocess text")
remove_duplicates = False remove_duplicates = True
min_num_words = 3 min_num_words = 3
tl = TextLoader(input_data) tl = TextLoader(input_data)
df_de = tl.load_text_csv( df_de = tl.load_text_csv(
newspaper="tagesanzeiger", newspaper="tagesanzeiger",
lang='de',
load_subset=False, load_subset=False,
remove_duplicates=remove_duplicates, remove_duplicates=remove_duplicates,
min_num_words=min_num_words, min_num_words=min_num_words,
) )
df_de = df_de.sample(50)
# Prepare data for modeling # Prepare data for modeling
text = df_de.text text = df_de.text
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment