Skip to content
Snippets Groups Projects
data-exploration.ipynb 133 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f5fd5a7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "\n",
    "from datetime import date\n",
    "from wordcloud import WordCloud \n",
    "\n",
    "from nltk.corpus import stopwords\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "03ed150e",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!python -m spacy download en_core_web_sm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1d4506a0",
   "metadata": {},
   "source": [
    "### Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6efcb560",
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '../data/tamedia_for_classifier_v4_preproc.csv'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m../data/tamedia_for_classifier_v4_preproc.csv\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m    899\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[1;32m    900\u001b[0m     dialect,\n\u001b[1;32m    901\u001b[0m     delimiter,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    908\u001b[0m     dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m    909\u001b[0m )\n\u001b[1;32m    910\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[0;32m--> 912\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/io/parsers/readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    574\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m    576\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[0;32m--> 577\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    579\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[1;32m    580\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m   1404\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m   1406\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1407\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1661\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m   1659\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[1;32m   1660\u001b[0m         mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1661\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1662\u001b[0m \u001b[43m    \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1663\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1664\u001b[0m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1665\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1666\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1667\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1668\u001b[0m \u001b[43m    \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1669\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1670\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1671\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1672\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/io/common.py:859\u001b[0m, in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    854\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    855\u001b[0m     \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[1;32m    856\u001b[0m     \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[1;32m    857\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[1;32m    858\u001b[0m         \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[0;32m--> 859\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    860\u001b[0m \u001b[43m            \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    861\u001b[0m \u001b[43m            \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    862\u001b[0m \u001b[43m            \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    863\u001b[0m \u001b[43m            \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    864\u001b[0m \u001b[43m            \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    865\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    866\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    867\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m    868\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../data/tamedia_for_classifier_v4_preproc.csv'"
     ]
    }
   ],
    "df = pd.read_csv('../data/tamedia_for_classifier_v4_preproc.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4bd7bf1c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>createdAt</th>\n",
       "      <th>text</th>\n",
       "      <th>rejected</th>\n",
       "      <th>state</th>\n",
       "      <th>originTenantId</th>\n",
       "      <th>replyTo</th>\n",
       "      <th>asset.risk</th>\n",
       "      <th>topic</th>\n",
       "      <th>hsprob</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5fee66486ef49d0033d97e4c</td>\n",
       "      <td>2021-01-01T00:01:12Z</td>\n",
       "      <td>Hat schon welche, möchte aber lieber nicht erw...</td>\n",
       "      <td>1</td>\n",
       "      <td>rejected</td>\n",
       "      <td>tagesanzeiger</td>\n",
       "      <td>5f537bbdd2abdd0032ec12ad</td>\n",
       "      <td>high</td>\n",
       "      <td>Zürich</td>\n",
       "      <td>0.051257</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5fee66b7e9b26b00322cc53e</td>\n",
       "      <td>2021-01-01T00:03:03Z</td>\n",
       "      <td>Wieso nicht? Absolut kein Argument.</td>\n",
       "      <td>1</td>\n",
       "      <td>rejected</td>\n",
       "      <td>tagesanzeiger</td>\n",
       "      <td>NaN</td>\n",
       "      <td>high</td>\n",
       "      <td>Bundeshaus</td>\n",
       "      <td>0.012496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5fee66bfe9b26b00322cc543</td>\n",
       "      <td>2021-01-01T00:03:11Z</td>\n",
       "      <td>Eine Impfung kostet vergleichsweise wenig. Und...</td>\n",
       "      <td>0</td>\n",
       "      <td>approved</td>\n",
       "      <td>derbund</td>\n",
       "      <td>5fee4bccb3aa6d0032c3c1f0</td>\n",
       "      <td>high</td>\n",
       "      <td>Bundeshaus</td>\n",
       "      <td>0.027282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5fee66dca0dd250033ef02ea</td>\n",
       "      <td>2021-01-01T00:03:40Z</td>\n",
       "      <td>Sind Sie einfach nur etwas einfach oder hochgr...</td>\n",
       "      <td>0</td>\n",
       "      <td>approved</td>\n",
       "      <td>tagesanzeiger</td>\n",
       "      <td>5fee1998e9b26b00322caaad</td>\n",
       "      <td>low</td>\n",
       "      <td>Meinungen</td>\n",
       "      <td>0.020309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5fee66ec6ef49d0033d97e7e</td>\n",
       "      <td>2021-01-01T00:03:56Z</td>\n",
       "      <td>Hä??? Von welchem Paralleluniversum ist hier m...</td>\n",
       "      <td>1</td>\n",
       "      <td>rejected</td>\n",
       "      <td>tagesanzeiger</td>\n",
       "      <td>5fedfcbdf31d260033d38738</td>\n",
       "      <td>low</td>\n",
       "      <td>Schweiz</td>\n",
       "      <td>0.018285</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         ID             createdAt   \n",
       "0  5fee66486ef49d0033d97e4c  2021-01-01T00:01:12Z  \\\n",
       "1  5fee66b7e9b26b00322cc53e  2021-01-01T00:03:03Z   \n",
       "2  5fee66bfe9b26b00322cc543  2021-01-01T00:03:11Z   \n",
       "3  5fee66dca0dd250033ef02ea  2021-01-01T00:03:40Z   \n",
       "4  5fee66ec6ef49d0033d97e7e  2021-01-01T00:03:56Z   \n",
       "\n",
       "                                                text  rejected     state   \n",
       "0  Hat schon welche, möchte aber lieber nicht erw...         1  rejected  \\\n",
       "1                Wieso nicht? Absolut kein Argument.         1  rejected   \n",
       "2  Eine Impfung kostet vergleichsweise wenig. Und...         0  approved   \n",
       "3  Sind Sie einfach nur etwas einfach oder hochgr...         0  approved   \n",
       "4  Hä??? Von welchem Paralleluniversum ist hier m...         1  rejected   \n",
       "\n",
       "  originTenantId                   replyTo asset.risk       topic    hsprob  \n",
       "0  tagesanzeiger  5f537bbdd2abdd0032ec12ad       high      Zürich  0.051257  \n",
       "1  tagesanzeiger                       NaN       high  Bundeshaus  0.012496  \n",
       "2        derbund  5fee4bccb3aa6d0032c3c1f0       high  Bundeshaus  0.027282  \n",
       "3  tagesanzeiger  5fee1998e9b26b00322caaad        low   Meinungen  0.020309  \n",
       "4  tagesanzeiger  5fedfcbdf31d260033d38738        low     Schweiz  0.018285  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1763a7a3",
   "metadata": {},
   "source": [
    "### Text"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ce3aecf4",
   "metadata": {},
   "source": [
    "#### Language"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a002db9f",
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'language'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3652\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3651\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3652\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3653\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/_libs/index.pyx:147\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/_libs/index.pyx:176\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mKeyError\u001b[0m: 'language'",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m fig, axes \u001b[38;5;241m=\u001b[39m plt\u001b[38;5;241m.\u001b[39msubplots(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m1\u001b[39m, figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m10\u001b[39m,\u001b[38;5;241m5\u001b[39m))\n\u001b[0;32m----> 2\u001b[0m sns\u001b[38;5;241m.\u001b[39mhistplot(\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlanguage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m)\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/core/frame.py:3761\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m   3760\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 3761\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3762\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m   3763\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m [indexer]\n",
      "File \u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/core/indexes/base.py:3654\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3652\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m   3653\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m-> 3654\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m   3655\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m   3656\u001b[0m     \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m   3657\u001b[0m     \u001b[38;5;66;03m#  InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m   3658\u001b[0m     \u001b[38;5;66;03m#  the TypeError.\u001b[39;00m\n\u001b[1;32m   3659\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
      "\u001b[0;31mKeyError\u001b[0m: 'language'"
     ]
      "image/png": "",
      "text/plain": [
       "<Figure size 1000x500 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, axes = plt.subplots(1, 1, figsize=(10,5))\n",
    "sns.histplot(df['language'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0fddd0b",
   "metadata": {},
   "source": [
    "#### Text examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3bbbe6f6",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'DataFrame' object has no attribute 'language'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/bn/hrm9f3gs76z5zb1bxxc4g_s00000gn/T/ipykernel_92361/1030998729.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_rejected_de\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlanguage\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'de'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrejected\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mdf_accepted_de\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlanguage\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'de'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrejected\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Rejected comments:'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   5985\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5986\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5987\u001b[0m         ):\n\u001b[1;32m   5988\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5989\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'language'"
     ]
    }
   ],
   "source": [
    "df_rejected_de = df[(df.language == 'de') & (df.rejected == True)]\n",
    "df_accepted_de = df[(df.language == 'de') & (df.rejected == False)]\n",
    "\n",
    "print('Rejected comments:')\n",
    "for c in df_rejected_de.text.sample(3):\n",
    "    print(c)\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "173770c7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accepted comments:\n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name 'df_accepted_de' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAccepted comments:\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m c \u001b[38;5;129;01min\u001b[39;00m \u001b[43mdf_accepted_de\u001b[49m\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;241m.\u001b[39msample(\u001b[38;5;241m3\u001b[39m):\n\u001b[1;32m      3\u001b[0m     \u001b[38;5;28mprint\u001b[39m(c)\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'df_accepted_de' is not defined"
     ]
    }
   ],
   "source": [
    "print('Accepted comments:')\n",
    "for c in df_accepted_de.text.sample(3):\n",
    "    print(c)\n",
    "    print('\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d640930",
   "metadata": {},
   "source": [
    "#### Word cloud"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "078ad528",
   "metadata": {},
   "outputs": [],
   "source": [
    "german_stop_words = stopwords.words('german')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "484f790d",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df_accepted_de' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[10], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Create and generate a word cloud image:\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m text_de_accepted \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[43mdf_accepted_de\u001b[49m\u001b[38;5;241m.\u001b[39mtext)\n\u001b[1;32m      3\u001b[0m wordcloud \u001b[38;5;241m=\u001b[39m WordCloud(stopwords\u001b[38;5;241m=\u001b[39mgerman_stop_words)\u001b[38;5;241m.\u001b[39mgenerate(text_de_accepted)\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# Display the generated image:\u001b[39;00m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'df_accepted_de' is not defined"
     ]
    }
   ],
   "source": [
    "# Create and generate a word cloud image:\n",
    "text_de_accepted = ' '.join(df_accepted_de.text)\n",
    "wordcloud = WordCloud(stopwords=german_stop_words).generate(text_de_accepted)\n",
    "\n",
    "# Display the generated image:\n",
    "plt.imshow(wordcloud, interpolation='bilinear')\n",
    "plt.axis(\"off\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5cc400e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create and generate a word cloud image:\n",
    "text_de_rejected = ' '.join(df_rejected_de.text)\n",
    "wordcloud = WordCloud(stopwords=german_stop_words).generate(text_de_rejected)\n",
    "\n",
    "# Display the generated image:\n",
    "plt.imshow(wordcloud, interpolation='bilinear')\n",
    "plt.axis(\"off\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9489615c",
   "metadata": {},
   "source": [
    "#### Number of words per sentence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58e91383",
   "metadata": {},
   "outputs": [],
   "source": [
    "words_per_sent_rejected = [len(comm.split()) for comm in df_rejected_de.text]\n",
    "words_per_sent_accepted = [len(comm.split()) for comm in df_accepted_de.text]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00cc3793",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(1, 2, figsize=(10,5))\n",
    "fig.suptitle('Number of words per sentence')\n",
    "sns.histplot(ax=axes[0], x=words_per_sent_rejected).set(title='rejected', xlim=(0,400))\n",
    "sns.histplot(ax=axes[1], x=words_per_sent_accepted).set(title='accepted', xlim=(0,400))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e864b48d",
   "metadata": {},
   "source": [
    "### Moderation result (per language)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "974e3104",
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'DataFrame' object has no attribute 'language'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/bn/hrm9f3gs76z5zb1bxxc4g_s00000gn/T/ipykernel_92361/38630575.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlanguage\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0;34m'de'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrejected\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtitle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'German comments'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/Documents/public_policy/pp_env/lib/python3.10/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   5985\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5986\u001b[0m             \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5987\u001b[0m         ):\n\u001b[1;32m   5988\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5989\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'language'"
     ]
    }
   ],
   "source": [
    "sns.histplot(df[df.language=='de'].rejected).set(title='German comments')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "5f0d883e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Text(0.5, 1.0, 'French comments')]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAAHHCAYAAACWQK1nAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8gElEQVR4nO3deVhV5f7//xegDCobHEES5xxzOGISlZpGoqnfTLvUBkNzaABLKU3THDtRVqYV5alOYic9lueUlZpGmFpKmhg5pBw1i0pBTWE7ArLX748+rJ87cIBuQfT5uK51Xay13ute731r7ldrr73wsCzLEgAAAP4Sz/JuAAAA4EpAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCcMVq2LCh+vTpU95tALhKEKoAXJTExER5eHgUu0yYMKG820M5WLFihaZNm1bebQCXjUrl3QCAimXGjBlq1KiR27brrruunLpBeVqxYoUSEhIIVsD/IVQBKJFevXqpY8eOF1V7+vRpeXt7y9OTi+IArnz8SwfAiDVr1sjDw0OLFy/W5MmTdc0116hKlSpyOp2SpI0bN6pnz54KCAhQlSpV1LVrV61fv95tjGnTpsnDw0N79uzR0KFDFRgYqICAAA0bNkwnT54scs733ntPnTp1UpUqVVS9enV16dJFn3/+eZG6r7/+Wp06dZKvr68aN26sd99996Jek8vl0ty5c9WmTRv5+vqqdu3a6tmzpzZv3mzXnDlzRjNnzlSTJk3k4+Ojhg0b6qmnnlJubq7bWIX3d61Zs0YdO3aUn5+f2rRpozVr1kiSPvzwQ/s8YWFh+u6779yOHzp0qKpVq6aMjAz16dNH1apV0zXXXKOEhARJ0rZt29S9e3dVrVpVDRo00KJFi4q8nuzsbI0ZM0ahoaHy8fFR06ZN9fzzz8vlctk1P/30kzw8PPTiiy/qzTfftF/X9ddfr2+//datn8Jzn/1RcKHFixcrLCxM/v7+cjgcatOmjebOnXtR8w5UVIQqACWSk5Ojw4cPuy1nmzlzppYvX64nnnhCzz77rLy9vbV69Wp16dJFTqdTU6dO1bPPPqvs7Gx1795dmzZtKnKOgQMH6tixY4qPj9fAgQOVmJio6dOnu9VMnz5dQ4YMUeXKlTVjxgxNnz5doaGhWr16tVvdnj17dNddd+m2227TSy+9pOrVq2vo0KHasWPHBV/r8OHD7RDy/PPPa8KECfL19dU333xj14wYMUJTpkxRhw4d9PLLL6tr166Kj4/X4MGDi4y3Z88e3XPPPerbt6/i4+N19OhR9e3bVwsXLtTYsWN13333afr06dq7d68GDhzoFnYkqaCgQL169VJoaKhmzZqlhg0bKjY2VomJierZs6c6duyo559/Xv7+/rr//vu1b98++9iTJ0+qa9eueu+993T//ffrlVde0U033aSJEycqLi6uSK+LFi3SCy+8oAcffFDPPPOMfvrpJ/Xv31/5+fmSpAcffFC33XabJOlf//qXvUhSUlKS7r77blWvXl3PP/+8nnvuOd1yyy1FQjRwxbEA4CLMnz/fklTsYlmW9eWXX1qSrMaNG1snT560j3O5XNa1115rRUVFWS6Xy95+8uRJq1GjRtZtt91mb5s6daolyXrggQfczn3nnXdaNWvWtNd3795teXp6WnfeeadVUFDgVnv2ORo0aGBJstatW2dvO3jwoOXj42M9/vjj5329q1evtiRZjz76aJF9hedIS0uzJFkjRoxw2//EE09YkqzVq1cX6WXDhg32tlWrVlmSLD8/P+vnn3+2t//jH/+wJFlffvmlvS06OtqSZD377LP2tqNHj1p+fn6Wh4eHtXjxYnv7rl27LEnW1KlT7W0zZ860qlatav3vf/9z63XChAmWl5eXlZGRYVmWZe3bt8+SZNWsWdM6cuSIXffxxx9bkqxPP/3U3hYTE2MV9zby2GOPWQ6Hwzpz5kyRfcCVjCtVAEokISFBSUlJbsvZoqOj5efnZ6+npaVp9+7duueee/T777/bV7dOnDihW2+9VevWrStyReahhx5yW+/cubN+//13+6PEpUuXyuVyacqUKUXu1zr7IyhJatWqlTp37myv165dW82bN9ePP/543tf53//+Vx4eHpo6dWqRfYXnWLFihSQVudLz+OOPS5KWL19epJeIiAh7PTw8XJLUvXt31a9fv8j24nocMWKE/XNgYKCaN2+uqlWrauDAgfb25s2bKzAw0O34JUuWqHPnzqpevbrbVcbIyEgVFBRo3bp1bucZNGiQqlevbq8XzuGF5q2wrxMnThT5uwFc6bhRHUCJdOrU6bw3qv/5m4G7d++W9EfYOpecnBy3N/CzA4Yke9/Ro0flcDi0d+9eeXp6qlWrVhfs989jFY539OjR8x63d+9ehYSEqEaNGues+fnnn+Xp6ammTZu6bQ8ODlZgYKB+/vnn8/YSEBAgSQoNDS12+597LLyv68+19erVKxImAwIC3I7fvXu3tm7dWuT4QgcPHjxvr2f/GVzII488og8++EC9evXSNddcox49emjgwIHq2bPnBY8FKjJCFQCjzr5KJcm+CvXCCy+offv2xR5TrVo1t3UvL69i6yzLKnE/Jsc6lz8HmpL2crE9/pXjXS6XbrvtNo0fP77Y2mbNmpWqp+LUqVNHaWlpWrVqlT777DN99tlnmj9/vu6//34tWLDggscDFRWhCsAl1aRJE0mSw+FQZGSksTFdLpd++OGHcwY1E+dYtWqVjhw5cs6rVQ0aNJDL5dLu3bvVsmVLe3tWVpays7PVoEGDS9JbaTRp0kTHjx839mcgnT9Ment7q2/fvurbt69cLpceeeQR/eMf/9DTTz9d5MoecKXgnioAl1RYWJiaNGmiF198UcePHy+y/9ChQyUes1+/fvL09NSMGTOK3I9l6grUgAEDZFlWkW8dnn2O22+/XZI0Z84ct/2zZ8+WJPXu3dtILyYMHDhQKSkpWrVqVZF92dnZOnPmTInHrFq1qn382X7//Xe3dU9PT7Vt21aSijxqAriScKUKwCXl6empt99+W7169VLr1q01bNgwXXPNNfrtt9/05ZdfyuFw6NNPPy3RmE2bNtWkSZM0c+ZMde7cWf3795ePj4++/fZbhYSEKD4+/i/33a1bNw0ZMkSvvPKKdu/erZ49e8rlcumrr75St27dFBsbq3bt2ik6OlpvvvmmsrOz1bVrV23atEkLFixQv3791K1bt7/chynjxo3TJ598oj59+mjo0KEKCwvTiRMntG3bNv3nP//RTz/9pFq1apVozLCwMEnSo48+qqioKHl5eWnw4MEaMWKEjhw5ou7du6tevXr6+eef9eqrr6p9+/ZuV/SAKw2hCsAld8sttyglJUUzZ87Ua6+9puPHjys4OFjh4eF68MEHSzVm4a/LefXVVzVp0iRVqVJFbdu21ZAhQ4z1PX/+fLVt21b//Oc/NW7cOAUEBKhjx4668cYb7Zq3335bjRs3VmJioj766CMFBwdr4sSJxX5rsDxVqVJFa9eu1bPPPqslS5bo3XfflcPhULNmzTR9+nT75viS6N+/v0aPHq3Fixfrvffek2VZGjx4sO677z69+eabev3115Wdna3g4GANGjRI06ZN4+n6uKJ5WCbv1gQAALhK8b8MAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwACeU1WGXC6X9u/fL39//4v+XWEAAKB8WZalY8eOKSQk5LzPWiNUlaH9+/cX+W30AACgYvjll19Ur169c+4nVJUhf39/SX/8oTgcjnLuBgAAXAyn06nQ0FD7ffxcCFVlqPAjP4fDQagCAKCCudCtO9yoDgAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMqFTeDcCMjIwMHT58+JKMXatWLdWvX/+SjA0AwJWCUHUFyMjIUIsWLXXq1MlLMr6fXxXt2rWTYAUAwHkQqq4Ahw8f1qlTJxX+wFQ56jY0OrbzwE/a+M50HT58mFAFAMB5EKquII66DVWjfvPybgMAgKsSN6oDAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGBAuYaq+Ph4XX/99fL391edOnXUr18/paenu9Xccsst8vDwcFseeught5qMjAz17t1bVapUUZ06dTRu3DidOXPGrWbNmjXq0KGDfHx81LRpUyUmJhbpJyEhQQ0bNpSvr6/Cw8O1adMmt/2nT59WTEyMatasqWrVqmnAgAHKysoyMxkAAKBCK9dQtXbtWsXExOibb75RUlKS8vPz1aNHD504ccKtbuTIkTpw4IC9zJo1y95XUFCg3r17Ky8vTxs2bNCCBQuUmJioKVOm2DX79u1T79691a1bN6WlpWnMmDEaMWKEVq1aZde8//77iouL09SpU7Vlyxa1a9dOUVFROnjwoF0zduxYffrpp1qyZInWrl2r/fv3q3///pdwhgAAQEXhYVmWVd5NFDp06JDq1KmjtWvXqkuXLpL+uFLVvn17zZkzp9hjPvvsM/Xp00f79+9XUFCQJGnevHl68skndejQIXl7e+vJJ5/U8uXLtX37dvu4wYMHKzs7WytXrpQkhYeH6/rrr9drr70mSXK5XAoNDdXo0aM1YcIE5eTkqHbt2lq0aJHuuusuSdKuXbvUsmVLpaSk6IYbbrjg63M6nQoICFBOTo4cDkep5+nPtmzZorCwMN02ab5q1G9ubFxJOpKRrqS/D1Nqaqo6dOhgdGwAACqCi33/vqzuqcrJyZEk1ahRw237woULVatWLV133XWaOHGiTp48ae9LSUlRmzZt7EAlSVFRUXI6ndqxY4ddExkZ6TZmVFSUUlJSJEl5eXlKTU11q/H09FRkZKRdk5qaqvz8fLeaFi1aqH79+nbNn+Xm5srpdLotAADgylSpvBso5HK5NGbMGN1000267rrr7O333HOPGjRooJCQEG3dulVPPvmk0tPT9eGHH0qSMjMz3QKVJHs9MzPzvDVOp1OnTp3S0aNHVVBQUGzNrl277DG8vb0VGBhYpKbwPH8WHx+v6dOnl3AmAABARXTZhKqYmBht375dX3/9tdv2UaNG2T+3adNGdevW1a233qq9e/eqSZMmZd1miUycOFFxcXH2utPpVGhoaDl2BAAALpXL4uO/2NhYLVu2TF9++aXq1at33trw8HBJ0p49eyRJwcHBRb6BV7geHBx83hqHwyE/Pz/VqlVLXl5exdacPUZeXp6ys7PPWfNnPj4+cjgcbgsAALgylWuosixLsbGx+uijj7R69Wo1atTogsekpaVJkurWrStJioiI0LZt29y+pZeUlCSHw6FWrVrZNcnJyW7jJCUlKSIiQpLk7e2tsLAwtxqXy6Xk5GS7JiwsTJUrV3arSU9PV0ZGhl0DAACuXuX68V9MTIwWLVqkjz/+WP7+/va9SQEBAfLz89PevXu1aNEi3X777apZs6a2bt2qsWPHqkuXLmrbtq0kqUePHmrVqpWGDBmiWbNmKTMzU5MnT1ZMTIx8fHwkSQ899JBee+01jR8/Xg888IBWr16tDz74QMuXL7d7iYuLU3R0tDp27KhOnTppzpw5OnHihIYNG2b3NHz4cMXFxalGjRpyOBwaPXq0IiIiLuqbfwAA4MpWrqHqjTfekPTHYxPONn/+fA0dOlTe3t764osv7IATGhqqAQMGaPLkyXatl5eXli1bpocfflgRERGqWrWqoqOjNWPGDLumUaNGWr58ucaOHau5c+eqXr16evvttxUVFWXXDBo0SIcOHdKUKVOUmZmp9u3ba+XKlW43r7/88svy9PTUgAEDlJubq6ioKL3++uuXaHYAAEBFclk9p+pKx3OqAACoeCrkc6oAAAAqKkIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGFCuoSo+Pl7XX3+9/P39VadOHfXr10/p6eluNadPn1ZMTIxq1qypatWqacCAAcrKynKrycjIUO/evVWlShXVqVNH48aN05kzZ9xq1qxZow4dOsjHx0dNmzZVYmJikX4SEhLUsGFD+fr6Kjw8XJs2bSpxLwAA4OpUrqFq7dq1iomJ0TfffKOkpCTl5+erR48eOnHihF0zduxYffrpp1qyZInWrl2r/fv3q3///vb+goIC9e7dW3l5edqwYYMWLFigxMRETZkyxa7Zt2+fevfurW7duiktLU1jxozRiBEjtGrVKrvm/fffV1xcnKZOnaotW7aoXbt2ioqK0sGDBy+6FwAAcPXysCzLKu8mCh06dEh16tTR2rVr1aVLF+Xk5Kh27dpatGiR7rrrLknSrl271LJlS6WkpOiGG27QZ599pj59+mj//v0KCgqSJM2bN09PPvmkDh06JG9vbz355JNavny5tm/fbp9r8ODBys7O1sqVKyVJ4eHhuv766/Xaa69Jklwul0JDQzV69GhNmDDhonq5EKfTqYCAAOXk5MjhcBibty1btigsLEy3TZqvGvWbGxtXko5kpCvp78OUmpqqDh06GB0bAICK4GLfvy+re6pycnIkSTVq1JAkpaamKj8/X5GRkXZNixYtVL9+faWkpEiSUlJS1KZNGztQSVJUVJScTqd27Nhh15w9RmFN4Rh5eXlKTU11q/H09FRkZKRdczG9/Flubq6cTqfbAgAArkyXTahyuVwaM2aMbrrpJl133XWSpMzMTHl7eyswMNCtNigoSJmZmXbN2YGqcH/hvvPVOJ1OnTp1SocPH1ZBQUGxNWePcaFe/iw+Pl4BAQH2EhoaepGzAQAAKprLJlTFxMRo+/btWrx4cXm3YszEiROVk5NjL7/88kt5twQAAC6RSuXdgCTFxsZq2bJlWrdunerVq2dvDw4OVl5enrKzs92uEGVlZSk4ONiu+fO39Aq/kXd2zZ+/pZeVlSWHwyE/Pz95eXnJy8ur2Jqzx7hQL3/m4+MjHx+fEswEAACoqMr1SpVlWYqNjdVHH32k1atXq1GjRm77w8LCVLlyZSUnJ9vb0tPTlZGRoYiICElSRESEtm3b5vYtvaSkJDkcDrVq1cquOXuMwprCMby9vRUWFuZW43K5lJycbNdcTC8AAODqVa5XqmJiYrRo0SJ9/PHH8vf3t+9NCggIkJ+fnwICAjR8+HDFxcWpRo0acjgcGj16tCIiIuxv2/Xo0UOtWrXSkCFDNGvWLGVmZmry5MmKiYmxrxI99NBDeu211zR+/Hg98MADWr16tT744AMtX77c7iUuLk7R0dHq2LGjOnXqpDlz5ujEiRMaNmyY3dOFegEAAFevcg1Vb7zxhiTplltucds+f/58DR06VJL08ssvy9PTUwMGDFBubq6ioqL0+uuv27VeXl5atmyZHn74YUVERKhq1aqKjo7WjBkz7JpGjRpp+fLlGjt2rObOnat69erp7bffVlRUlF0zaNAgHTp0SFOmTFFmZqbat2+vlStXut28fqFeAADA1euyek7VlY7nVAEAUPFUyOdUAQAAVFSEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAaUKlQ1btxYv//+e5Ht2dnZaty48V9uCgAAoKIpVaj66aefVFBQUGR7bm6ufvvtt7/cFAAAQEVTqSTFn3zyif3zqlWrFBAQYK8XFBQoOTlZDRs2NNYcAABARVGiK1X9+vVTv3795OHhoejoaHu9X79+Gjx4sJKSkvTSSy9d9Hjr1q1T3759FRISIg8PDy1dutRt/9ChQ+Xh4eG29OzZ063myJEjuvfee+VwOBQYGKjhw4fr+PHjbjVbt25V586d5evrq9DQUM2aNatIL0uWLFGLFi3k6+urNm3aaMWKFW77LcvSlClTVLduXfn5+SkyMlK7d+++6NcKAACubCUKVS6XSy6XS/Xr19fBgwftdZfLpdzcXKWnp6tPnz4XPd6JEyfUrl07JSQknLOmZ8+eOnDggL38+9//dtt/7733aseOHUpKStKyZcu0bt06jRo1yt7vdDrVo0cPNWjQQKmpqXrhhRc0bdo0vfnmm3bNhg0bdPfdd2v48OH67rvv7KC4fft2u2bWrFl65ZVXNG/ePG3cuFFVq1ZVVFSUTp8+fdGvFwAAXLlK9PFfoX379hk5ea9evdSrV6/z1vj4+Cg4OLjYfTt37tTKlSv17bffqmPHjpKkV199VbfffrtefPFFhYSEaOHChcrLy9M777wjb29vtW7dWmlpaZo9e7YdvubOnauePXtq3LhxkqSZM2cqKSlJr732mubNmyfLsjRnzhxNnjxZd9xxhyTp3XffVVBQkJYuXarBgwcbmQ8AAFBxlSpUSVJycrKSk5PtK1Zne+edd/5yY4XWrFmjOnXqqHr16urevbueeeYZ1axZU5KUkpKiwMBAO1BJUmRkpDw9PbVx40bdeeedSklJUZcuXeTt7W3XREVF6fnnn9fRo0dVvXp1paSkKC4uzu28UVFR9seR+/btU2ZmpiIjI+39AQEBCg8PV0pKyjlDVW5urnJzc+11p9P5l+cDAABcnkr17b/p06erR48eSk5O1uHDh3X06FG3xZSePXvq3XffVXJysp5//nmtXbtWvXr1sr95mJmZqTp16rgdU6lSJdWoUUOZmZl2TVBQkFtN4fqFas7ef/ZxxdUUJz4+XgEBAfYSGhpaotcPAAAqjlJdqZo3b54SExM1ZMgQ0/24OfsKUJs2bdS2bVs1adJEa9as0a233npJz23CxIkT3a6AOZ1OghUAAFeoUl2pysvL04033mi6lwtq3LixatWqpT179kiSgoODdfDgQbeaM2fO6MiRI/Z9WMHBwcrKynKrKVy/UM3Z+88+rria4vj4+MjhcLgtAADgylSqUDVixAgtWrTIdC8X9Ouvv+r3339X3bp1JUkRERHKzs5WamqqXbN69Wq5XC6Fh4fbNevWrVN+fr5dk5SUpObNm6t69ep2TXJystu5kpKSFBERIUlq1KiRgoOD3WqcTqc2btxo1wAAgKtbqT7+O336tN5880198cUXatu2rSpXruy2f/bs2Rc1zvHjx+2rTtIfN4SnpaWpRo0aqlGjhqZPn64BAwYoODhYe/fu1fjx49W0aVNFRUVJklq2bKmePXtq5MiRmjdvnvLz8xUbG6vBgwcrJCREknTPPfdo+vTpGj58uJ588klt375dc+fO1csvv2yf97HHHlPXrl310ksvqXfv3lq8eLE2b95sP3bBw8NDY8aM0TPPPKNrr71WjRo10tNPP62QkBD169evNFMIAACuMKUKVVu3blX79u0lye1ZTtIfAeRibd68Wd26dbPXC+8/io6O1htvvKGtW7dqwYIFys7OVkhIiHr06KGZM2fKx8fHPmbhwoWKjY3VrbfeKk9PTw0YMECvvPKKvT8gIECff/65YmJiFBYWplq1amnKlCluz7K68cYbtWjRIk2ePFlPPfWUrr32Wi1dulTXXXedXTN+/HidOHFCo0aNUnZ2tm6++WatXLlSvr6+F/16AQDAlcvDsiyrvJu4WjidTgUEBCgnJ8fo/VVbtmxRWFiYbps0XzXqNzc2riQdyUhX0t+HKTU1VR06dDA6NgAAFcHFvn+X6p4qAAAAuCvVx3/dunU778d8q1evLnVDAAAAFVGpQlXh/VSF8vPzlZaWpu3btys6OtpEXwAAABVKqULV2d+cO9u0adN0/Pjxv9QQAABARWT0nqr77rvP6O/9AwAAqCiMhqqUlBQeMQAAAK5Kpfr4r3///m7rlmXpwIED2rx5s55++mkjjQEAAFQkpQpVAQEBbuuenp5q3ry5ZsyYoR49ehhpDAAAoCIpVaiaP3++6T4AAAAqtFKFqkKpqanauXOnJKl169b629/+ZqQpAACAiqZUoergwYMaPHiw1qxZo8DAQElSdna2unXrpsWLF6t27domewQAALjslerbf6NHj9axY8e0Y8cOHTlyREeOHNH27dvldDr16KOPmu4RAADgsleqK1UrV67UF198oZYtW9rbWrVqpYSEBG5UBwAAV6VSXalyuVyqXLlyke2VK1eWy+X6y00BAABUNKUKVd27d9djjz2m/fv329t+++03jR07Vrfeequx5gAAACqKUoWq1157TU6nUw0bNlSTJk3UpEkTNWrUSE6nU6+++qrpHgEAAC57pbqnKjQ0VFu2bNEXX3yhXbt2SZJatmypyMhIo80BAABUFCW6UrV69Wq1atVKTqdTHh4euu222zR69GiNHj1a119/vVq3bq2vvvrqUvUKAABw2SpRqJozZ45Gjhwph8NRZF9AQIAefPBBzZ4921hzAAAAFUWJQtX333+vnj17nnN/jx49lJqa+pebAgAAqGhKFKqysrKKfZRCoUqVKunQoUN/uSkAAICKpkSh6pprrtH27dvPuX/r1q2qW7fuX24KAACgoilRqLr99tv19NNP6/Tp00X2nTp1SlOnTlWfPn2MNQcAAFBRlOiRCpMnT9aHH36oZs2aKTY2Vs2bN5ck7dq1SwkJCSooKNCkSZMuSaMAAACXsxKFqqCgIG3YsEEPP/ywJk6cKMuyJEkeHh6KiopSQkKCgoKCLkmjAAAAl7MSP/yzQYMGWrFihY4ePao9e/bIsixde+21ql69+qXoDwAAoEIo1RPVJal69eq6/vrrTfYCAABQYZXqd/8BAADAHaEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAeUaqtatW6e+ffsqJCREHh4eWrp0qdt+y7I0ZcoU1a1bV35+foqMjNTu3bvdao4cOaJ7771XDodDgYGBGj58uI4fP+5Ws3XrVnXu3Fm+vr4KDQ3VrFmzivSyZMkStWjRQr6+vmrTpo1WrFhR4l4AAMDVq1xD1YkTJ9SuXTslJCQUu3/WrFl65ZVXNG/ePG3cuFFVq1ZVVFSUTp8+bdfce++92rFjh5KSkrRs2TKtW7dOo0aNsvc7nU716NFDDRo0UGpqql544QVNmzZNb775pl2zYcMG3X333Ro+fLi+++479evXT/369dP27dtL1AsAALh6eViWZZV3E5Lk4eGhjz76SP369ZP0x5WhkJAQPf7443riiSckSTk5OQoKClJiYqIGDx6snTt3qlWrVvr222/VsWNHSdLKlSt1++2369dff1VISIjeeOMNTZo0SZmZmfL29pYkTZgwQUuXLtWuXbskSYMGDdKJEye0bNkyu58bbrhB7du317x58y6ql4vhdDoVEBCgnJwcORwOI/MmSVu2bFFYWJhumzRfNeo3NzauJB3JSFfS34cpNTVVHTp0MDo2AAAVwcW+f1+291Tt27dPmZmZioyMtLcFBAQoPDxcKSkpkqSUlBQFBgbagUqSIiMj5enpqY0bN9o1Xbp0sQOVJEVFRSk9PV1Hjx61a84+T2FN4Xkuppfi5Obmyul0ui0AAODKdNmGqszMTElSUFCQ2/agoCB7X2ZmpurUqeO2v1KlSqpRo4ZbTXFjnH2Oc9Wcvf9CvRQnPj5eAQEB9hIaGnqBVw0AACqqyzZUXQkmTpyonJwce/nll1/KuyUAAHCJXLahKjg4WJKUlZXltj0rK8veFxwcrIMHD7rtP3PmjI4cOeJWU9wYZ5/jXDVn779QL8Xx8fGRw+FwWwAAwJXpsg1VjRo1UnBwsJKTk+1tTqdTGzduVEREhCQpIiJC2dnZSk1NtWtWr14tl8ul8PBwu2bdunXKz8+3a5KSktS8eXNVr17drjn7PIU1hee5mF4AAMDVrVxD1fHjx5WWlqa0tDRJf9wQnpaWpoyMDHl4eGjMmDF65pln9Mknn2jbtm26//77FRISYn9DsGXLlurZs6dGjhypTZs2af369YqNjdXgwYMVEhIiSbrnnnvk7e2t4cOHa8eOHXr//fc1d+5cxcXF2X089thjWrlypV566SXt2rVL06ZN0+bNmxUbGytJF9ULAAC4ulUqz5Nv3rxZ3bp1s9cLg050dLQSExM1fvx4nThxQqNGjVJ2drZuvvlmrVy5Ur6+vvYxCxcuVGxsrG699VZ5enpqwIABeuWVV+z9AQEB+vzzzxUTE6OwsDDVqlVLU6ZMcXuW1Y033qhFixZp8uTJeuqpp3Tttddq6dKluu666+yai+kFAABcvS6b51RdDXhOFQAAFU+Ff04VAABARUKoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMu61A1bdo0eXh4uC0tWrSw958+fVoxMTGqWbOmqlWrpgEDBigrK8ttjIyMDPXu3VtVqlRRnTp1NG7cOJ05c8atZs2aNerQoYN8fHzUtGlTJSYmFuklISFBDRs2lK+vr8LDw7Vp06ZL8poBAEDFdFmHKklq3bq1Dhw4YC9ff/21vW/s2LH69NNPtWTJEq1du1b79+9X//797f0FBQXq3bu38vLytGHDBi1YsECJiYmaMmWKXbNv3z717t1b3bp1U1pamsaMGaMRI0Zo1apVds3777+vuLg4TZ06VVu2bFG7du0UFRWlgwcPls0kAACAy95lH6oqVaqk4OBge6lVq5YkKScnR//85z81e/Zsde/eXWFhYZo/f742bNigb775RpL0+eef64cfftB7772n9u3bq1evXpo5c6YSEhKUl5cnSZo3b54aNWqkl156SS1btlRsbKzuuusuvfzyy3YPs2fP1siRIzVs2DC1atVK8+bNU5UqVfTOO++U/YQAAIDL0mUfqnbv3q2QkBA1btxY9957rzIyMiRJqampys/PV2RkpF3bokUL1a9fXykpKZKklJQUtWnTRkFBQXZNVFSUnE6nduzYYdecPUZhTeEYeXl5Sk1Ndavx9PRUZGSkXQMAAFCpvBs4n/DwcCUmJqp58+Y6cOCApk+frs6dO2v79u3KzMyUt7e3AgMD3Y4JCgpSZmamJCkzM9MtUBXuL9x3vhqn06lTp07p6NGjKigoKLZm165d5+0/NzdXubm59rrT6bz4Fw8AACqUyzpU9erVy/65bdu2Cg8PV4MGDfTBBx/Iz8+vHDu7OPHx8Zo+fXp5twEAAMrAZf/x39kCAwPVrFkz7dmzR8HBwcrLy1N2drZbTVZWloKDgyVJwcHBRb4NWLh+oRqHwyE/Pz/VqlVLXl5exdYUjnEuEydOVE5Ojr388ssvJX7NAACgYqhQoer48ePau3ev6tatq7CwMFWuXFnJycn2/vT0dGVkZCgiIkKSFBERoW3btrl9Sy8pKUkOh0OtWrWya84eo7CmcAxvb2+FhYW51bhcLiUnJ9s15+Lj4yOHw+G2AACAK9NlHaqeeOIJrV27Vj/99JM2bNigO++8U15eXrr77rsVEBCg4cOHKy4uTl9++aVSU1M1bNgwRURE6IYbbpAk9ejRQ61atdKQIUP0/fffa9WqVZo8ebJiYmLk4+MjSXrooYf0448/avz48dq1a5def/11ffDBBxo7dqzdR1xcnN566y0tWLBAO3fu1MMPP6wTJ05o2LBh5TIvAADg8nNZ31P166+/6u6779bvv/+u2rVr6+abb9Y333yj2rVrS5JefvlleXp6asCAAcrNzVVUVJRef/11+3gvLy8tW7ZMDz/8sCIiIlS1alVFR0drxowZdk2jRo20fPlyjR07VnPnzlW9evX09ttvKyoqyq4ZNGiQDh06pClTpigzM1Pt27fXypUri9y8DgAArl4elmVZ5d3E1cLpdCogIEA5OTlGPwrcsmWLwsLCdNuk+apRv7mxcSXpSEa6kv4+TKmpqerQoYPRsQEAqAgu9v37sv74DwAAoKIgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABlQq7wYAAMDVJSMjQ4cPHzY+bq1atVS/fn3j414sQhUAACgzGRkZatGipU6dOml8bD+/Ktq1a2e5BStCFQAAKDOHDx/WqVMnFf7AVDnqNjQ2rvPAT9r4znQdPnyYUAUAAK4ejroNVaN+8/JuwyhuVAcAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAADAAEIVAACAAYQqAAAAAwhVAAAABhCqAAAADCBUAQAAGECoAgAAMIBQBQAAYAChCgAAwABCFQAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVAEAABhAqAIAADCAUAUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKGqhBISEtSwYUP5+voqPDxcmzZtKu+WAADAZYBQVQLvv/++4uLiNHXqVG3ZskXt2rVTVFSUDh48WN6tAQCAckaoKoHZs2dr5MiRGjZsmFq1aqV58+apSpUqeuedd8q7NQAAUM4IVRcpLy9PqampioyMtLd5enoqMjJSKSkp5dgZAAC4HFQq7wYqisOHD6ugoEBBQUFu24OCgrRr165ij8nNzVVubq69npOTI0lyOp1Gezt+/Lgk6cjP6TqTe8ro2M7MDElSamqqfR6TPD095XK5Ksy4l3Jsei6bsem54o9Nz2Uz9qUaNz09XZL596zC96vjx48bf58tHM+yrPPWEaouofj4eE2fPr3I9tDQ0EtyvtT3nrsk40rSqFGjLtnYAICrz6V6z+rateslGVeSjh07poCAgHPuJ1RdpFq1asnLy0tZWVlu27OyshQcHFzsMRMnTlRcXJy97nK5dOTIEdWsWVMeHh7GenM6nQoNDdUvv/wih8NhbFwUxVyXDea5bDDPZYN5LhuXcp4ty9KxY8cUEhJy3jpC1UXy9vZWWFiYkpOT1a9fP0l/hKTk5GTFxsYWe4yPj498fHzctgUGBl6yHh0OB//BlhHmumwwz2WDeS4bzHPZuFTzfL4rVIUIVSUQFxen6OhodezYUZ06ddKcOXN04sQJDRs2rLxbAwAA5YxQVQKDBg3SoUOHNGXKFGVmZqp9+/ZauXJlkZvXAQDA1YdQVUKxsbHn/LivvPj4+Gjq1KlFPmqEecx12WCeywbzXDaY57JxOcyzh3Wh7wcCAADggnj4JwAAgAGEKgAAAAMIVQAAAAYQqgAAAAwgVFUQCQkJatiwoXx9fRUeHq5Nmzadt37JkiVq0aKFfH191aZNG61YsaKMOq34SjLXb731ljp37qzq1aurevXqioyMvOCfDf5Q0r/ThRYvXiwPDw/7Ibw4v5LOc3Z2tmJiYlS3bl35+PioWbNm/PtxEUo6z3PmzFHz5s3l5+en0NBQjR07VqdPny6jbiumdevWqW/fvgoJCZGHh4eWLl16wWPWrFmjDh06yMfHR02bNlViYuKlbdLCZW/x4sWWt7e39c4771g7duywRo4caQUGBlpZWVnF1q9fv97y8vKyZs2aZf3www/W5MmTrcqVK1vbtm0r484rnpLO9T333GMlJCRY3333nbVz505r6NChVkBAgPXrr7+WcecVS0nnudC+ffusa665xurcubN1xx13lE2zFVhJ5zk3N9fq2LGjdfvtt1tff/21tW/fPmvNmjVWWlpaGXdesZR0nhcuXGj5+PhYCxcutPbt22etWrXKqlu3rjV27Ngy7rxiWbFihTVp0iTrww8/tCRZH3300Xnrf/zxR6tKlSpWXFyc9cMPP1ivvvqq5eXlZa1cufKS9UioqgA6depkxcTE2OsFBQVWSEiIFR8fX2z9wIEDrd69e7ttCw8Ptx588MFL2ueVoKRz/Wdnzpyx/P39rQULFlyqFq8IpZnnM2fOWDfeeKP19ttvW9HR0YSqi1DSeX7jjTesxo0bW3l5eWXV4hWhpPMcExNjde/e3W1bXFycddNNN13SPq8kFxOqxo8fb7Vu3dpt26BBg6yoqKhL1hcf/13m8vLylJqaqsjISHubp6enIiMjlZKSUuwxKSkpbvWSFBUVdc56/KE0c/1nJ0+eVH5+vmrUqHGp2qzwSjvPM2bMUJ06dTR8+PCyaLPCK808f/LJJ4qIiFBMTIyCgoJ03XXX6dlnn1VBQUFZtV3hlGaeb7zxRqWmptofEf74449asWKFbr/99jLp+WpRHu+FPFH9Mnf48GEVFBQU+VU4QUFB2rVrV7HHZGZmFlufmZl5yfq8EpRmrv/sySefVEhISJH/kPH/K808f/311/rnP/+ptLS0MujwylCaef7xxx+1evVq3XvvvVqxYoX27NmjRx55RPn5+Zo6dWpZtF3hlGae77nnHh0+fFg333yzLMvSmTNn9NBDD+mpp54qi5avGud6L3Q6nTp16pT8/PyMn5MrVYAhzz33nBYvXqyPPvpIvr6+5d3OFePYsWMaMmSI3nrrLdWqVau827miuVwu1alTR2+++abCwsI0aNAgTZo0SfPmzSvv1q4oa9as0bPPPqvXX39dW7Zs0Ycffqjly5dr5syZ5d0a/iKuVF3matWqJS8vL2VlZbltz8rKUnBwcLHHBAcHl6gefyjNXBd68cUX9dxzz+mLL75Q27ZtL2WbFV5J53nv3r366aef1LdvX3uby+WSJFWqVEnp6elq0qTJpW26AirN3+e6deuqcuXK8vLysre1bNlSmZmZysvLk7e39yXtuSIqzTw//fTTGjJkiEaMGCFJatOmjU6cOKFRo0Zp0qRJ8vTkeocJ53ovdDgcl+QqlcSVqsuet7e3wsLClJycbG9zuVxKTk5WREREscdERES41UtSUlLSOevxh9LMtSTNmjVLM2fO1MqVK9WxY8eyaLVCK+k8t2jRQtu2bVNaWpq9/L//9//UrVs3paWlKTQ0tCzbrzBK8/f5pptu0p49e+zQKkn/+9//VLduXQLVOZRmnk+ePFkkOBUGWYtfx2tMubwXXrJb4GHM4sWLLR8fHysxMdH64YcfrFGjRlmBgYFWZmamZVmWNWTIEGvChAl2/fr1661KlSpZL774orVz505r6tSpPFLhIpV0rp977jnL29vb+s9//mMdOHDAXo4dO1ZeL6FCKOk8/xnf/rs4JZ3njIwMy9/f34qNjbXS09OtZcuWWXXq1LGeeeaZ8noJFUJJ53nq1KmWv7+/9e9//9v68ccfrc8//9xq0qSJNXDgwPJ6CRXCsWPHrO+++8767rvvLEnW7Nmzre+++876+eefLcuyrAkTJlhDhgyx6wsfqTBu3Dhr586dVkJCAo9UwB9effVVq379+pa3t7fVqVMn65tvvrH3de3a1YqOjnar/+CDD6xmzZpZ3t7eVuvWra3ly5eXcccVV0nmukGDBpakIsvUqVPLvvEKpqR/p89GqLp4JZ3nDRs2WOHh4ZaPj4/VuHFj6+9//7t15syZMu664inJPOfn51vTpk2zmjRpYvn6+lqhoaHWI488Yh09erTsG69Avvzyy2L/vS2c2+joaKtr165Fjmnfvr3l7e1tNW7c2Jo/f/4l7dHDsrjWCAAA8FdxTxUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgAKEKAP5k6NCh6tevX3m3UazLuTfgascvVAaAP5k7d67R38E2dOhQZWdna+nSpcbGBHD5IVQBuKrk5eVd8JcDBwQElFE3AK4kfPwH4Ip2yy23KDY2VmPGjFGtWrUUFRWl7du3q1evXqpWrZqCgoI0ZMgQHT582D7mzx+xuVwuxcfHq1GjRvLz81O7du30n//8x+08O3bsUJ8+feRwOOTv76/OnTtr7969mjZtmhYsWKCPP/5YHh4e8vDw0Jo1ayRJv/zyiwYOHKjAwEDVqFFDd9xxh3766Sd7zIKCAsXFxSkwMFA1a9bU+PHjjV5BA2AWoQrAFW/BggXy9vbW+vXr9dxzz6l79+7629/+ps2bN2vlypXKysrSwIEDz3l8fHy83n33Xc2bN087duzQ2LFjdd9992nt2rWSpN9++01dunSRj4+PVq9erdTUVD3wwAM6c+aMnnjiCQ0cOFA9e/bUgQMHdODAAd14443Kz89XVFSU/P399dVXX2n9+vWqVq2aevbsqby8PEnSSy+9pMTERL3zzjv6+uuvdeTIEX300UdlMmcASo5fqAzginbLLbfI6XRqy5YtkqRnnnlGX331lVatWmXX/PrrrwoNDVV6erqaNWvmdg9Ubm6uatSooS+++EIRERH2MSNGjNDJkye1aNEiPfXUU1q8eLHS09NVuXLlIj0Ud0/Ve++9p2eeeUY7d+6Uh4eHpD8+mgwMDNTSpUvVo0cPhYSEaOzYsRo3bpwk6cyZM2rUqJHCwsK4Pwu4DHFPFYArXlhYmP3z999/ry+//FLVqlUrUrd37141a9bMbduePXt08uRJ3XbbbW7b8/Ly9Le//U2SlJaWps6dOxcbqM7l+++/1549e+Tv7++2/fTp09q7d69ycnJ04MABhYeH2/sqVaqkjh078hEgcJkiVAG44lWtWtX++fjx4+rbt6+ef/75InV169Ytsu348eOSpOXLl+uaa65x2+fj4yNJ8vPzK3FPx48fV1hYmBYuXFhkX+3atUs8HoDyR6gCcFXp0KGD/vvf/6phw4aqVOnC/wS2atVKPj4+ysjIUNeuXYutadu2rRYsWKD8/Pxir1Z5e3uroKCgSB/vv/++6tSpI4fDUey4devW1caNG9WlSxdJf3z8l5qaqg4dOlywbwBljxvVAVxVYmJidOTIEd1999369ttvtXfvXq1atUrDhg0rEnwkyd/fX0888YTGjh2rBQsWaO/evdqyZYteffVVLViwQJIUGxsrp9OpwYMHa/Pmzdq9e7f+9a9/KT09XZLUsGFDbd26Venp6Tp8+LDy8/N17733qlatWrrjjjv01Vdfad++fVqzZo0effRR/frrr5Kkxx57TM8995yWLl2qXbt26ZFHHlF2dnaZzRWAkiFUAbiqhISEaP369SooKFCPHj3Upk0bjRkzRoGBgfL0LP6fxJkzZ+rpp59WfHy8WrZsqZ49e2r58uVq1KiRJKlmzZpavXq1jh8/rq5duyosLExvvfWWfdVq5MiRat68uTp27KjatWtr/fr1qlKlitatW6f69eurf//+atmypYYPH67Tp0/bV64ef/xxDRkyRNHR0YqIiJC/v7/uvPPOspkoACXGt/8A4E/uvvtueXl56b333ivvVgBUIFypAoD/c+bMGf3www9KSUlR69aty7sdABUMoQoA/s/27dvVsWNHtW7dWg899FB5twOgguHjPwAAAAO4UgUAAGAAoQoAAMAAQhUAAIABhCoAAAADCFUAAAAGEKoAAAAMIFQBAAAYQKgCAAAwgFAFAABgwP8H5wDB3oxnxc4AAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.histplot(df[df.language=='fr'].rejected).set(title='French comments')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "afc470b0",
   "metadata": {},
   "source": [
    "### Distribution over the year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d7591d09",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def extract_date(string):\n",
    "    year = int(string[:4])\n",
    "    month = int(string[5:7])\n",
    "    day = int(string[8:10])\n",
    "    return date(year, month, day)\n",
    "\n",
    "sns.histplot(df.createdAt.map(extract_date));"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "990bc0e7",
   "metadata": {},
   "source": [
    "### Newspaper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "9e780978",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/bn/hrm9f3gs76z5zb1bxxc4g_s00000gn/T/ipykernel_72170/805556954.py:2: UserWarning: FixedFormatter should only be used together with FixedLocator\n",
      "  g.set_xticklabels(g.get_xticklabels(), rotation=90);\n"
     ]
    },
    {
     "data": {
      "image/png": "",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "g = sns.histplot(df.originTenantId)\n",
    "g.set_xticklabels(g.get_xticklabels(), rotation=90);"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3db1edc8",
   "metadata": {},
   "source": [
    "### Outline of project"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fdf82786",
   "metadata": {},
   "source": [
    "1. German only\n",
    "- Multinomial Naive Bayes (MNB) (with described preprocessing)\n",
    "- mBert (with described preprocessing)\n",
    "\n",
    "2. French only (?)\n",
    "- Multinomial Naive Bayes (MNB) (with described preprocessing)\n",
    "- mBert (with described preprocessing)\n",
    "\n",
    "3. Multilingual\n",
    "- Multinomial Naive Bayes (MNB) (with described preprocessing)\n",
    "- mBert (with described preprocessing)\n",
    "\n",
    "Should all newpapers be combined?\n",
    "\n",
    "Otherwise: do the following per case:\n",
    "- Training of one classifier based on the whole corpus.\n",
    "- Training of one classifier per newspaper.\n",
    "- Training of one classifier based on the whole corpus and fine-tuning per newspaper.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "126164ff",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pp_env",
   "language": "python",
   "name": "pp_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}