Skip to content
Snippets Groups Projects
01_introduction.ipynb 38.1 KiB
Newer Older
  • Learn to ignore specific revisions
  •    "outputs": [],
    
        "# Recall: ?LogisticRegression\n",
    
       "cell_type": "code",
    
    schmittu's avatar
    schmittu committed
       "execution_count": null,
    
       "metadata": {},
       "outputs": [],
    
        "from sklearn.linear_model import LogisticRegression\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
    
    schmittu's avatar
    schmittu committed
        "classifier = LogisticRegression(C=2)\n",
    
        "classifier.fit(input_features, labels)\n",
    
        "predicted_labels = classifier.predict(input_features)\n",
    
    schmittu's avatar
    schmittu committed
        "assert predicted_labels.shape == labels.shape\n",
    
        "print(len(labels), \"examples\")\n",
    
    schmittu's avatar
    schmittu committed
        "print(sum(predicted_labels == labels), \"labeled correctly\")\n",
        "print(sum(predicted_labels == labels) / len(labels) * 100, \"% labeled correctly\")"
    
       "metadata": {},
    
        "<div class=\"alert alert-block alert-warning\">\n",
    
    schmittu's avatar
    schmittu committed
        "<i class=\"fa fa-warning\"></i>&nbsp;<strong>Classifiers have hyper-parameters</strong>\n",
    
    schmittu's avatar
    schmittu committed
        "All classifiers have hyper-parameters, e.g. the `C` we have seen before. It is an incident that both, `LogisticRegression` and `SVC`, have parameter named `C`. Beyond that some classifiers have more than one parameter, e.g. `SVC` also has a parameter `gamma`. But more about these details later.\n",
    
       "metadata": {},
    
        "## Optional exercise"
    
       "metadata": {},
    
       "source": [
        "Load and inspect the cannonical Fisher's \"Iris\" data set, which is included in `scikit-learn`: see [docs for `sklearn.datasets.load_iris`](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html). What's conceptually diffferent?\n",
    
    schmittu's avatar
    schmittu committed
        "Inspect the data using scatter plots.\n",
        "\n",
    
        "Apply `LogisticRegression` or `SVC` classifiers. Is it easier or more difficult than classification of the beers data?\n",
    
    schmittu's avatar
    schmittu committed
      },
    
    schmittu's avatar
    schmittu committed
       "execution_count": null,
    
       "metadata": {},
       "outputs": [],
    
       "source": [
        "from sklearn.datasets import load_iris\n",
        "\n",
        "data = load_iris()\n",
        "\n",
        "# labels as text\n",
    
    schmittu's avatar
    schmittu committed
        "print(data.target_names)\n",
    
        "\n",
        "# (rows, columns) of the feature matrix:\n",
    
    schmittu's avatar
    schmittu committed
        "print(data.data.shape)"
    
    schmittu's avatar
    schmittu committed
       "execution_count": null,
    
       "metadata": {},
       "outputs": [],
    
       "source": [
        "# transform the scikit-learn data structure into a data frame:\n",
        "df = pd.DataFrame(data.data, columns=data.feature_names)\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
        "# add new column\n",
    
        "df[\"class\"] = data.target\n",
        "df.head()"
    
    schmittu's avatar
    schmittu committed
       "execution_count": null,
    
       "metadata": {},
       "outputs": [],
    
        "# SOLUTION STARTS HERE"
    
    schmittu's avatar
    schmittu committed
       "execution_count": null,
    
       "metadata": {
        "tags": [
         "solution"
        ]
       },
       "outputs": [],
    
       "source": [
        "import seaborn as sns\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
    
        "sns.set(style=\"ticks\")\n",
        "\n",
        "for_plot = df.copy()\n",
        "\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
    
        "def transform_label(class_):\n",
        "    return data.target_names[class_]\n",
        "\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
    
        "# seaborn does not work here if we use numeric values in the class\n",
        "# column, or strings which represent numbers. To fix this we\n",
        "# create textual class labels\n",
        "for_plot[\"class\"] = for_plot[\"class\"].apply(transform_label)\n",
    
    schmittu's avatar
    schmittu committed
        "sns.pairplot(for_plot, hue=\"class\", diag_kind=\"hist\");"
    
       "metadata": {
        "tags": [
         "solution"
        ]
       },
       "outputs": [],
    
    schmittu's avatar
    schmittu committed
       "source": [
    
        "features = df.iloc[:, :-1]\n",
        "labels = df.iloc[:, -1]\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
    
        "# classifier = SVC()\n",
    
    schmittu's avatar
    schmittu committed
        "classifier = LogisticRegression(max_iter=200)\n",
    
        "classifier.fit(features, labels)\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
    
        "predicted_labels = classifier.predict(features)\n",
        "\n",
    
    schmittu's avatar
    schmittu committed
        "assert predicted_labels.shape == labels.shape\n",
    
        "print(len(labels), \"examples\")\n",
        "print(sum(predicted_labels == labels), \"labeled correctly\")"
    
       "metadata": {},
    
    schmittu's avatar
    schmittu committed
        "Copyright (C) 2019-2022 ETH Zurich, SIS ID"
    
    chadhat's avatar
    chadhat committed
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": []
    
    schmittu's avatar
    schmittu committed
      }
     ],
     "metadata": {
    
      "kernelspec": {
    
       "display_name": "Python 3 (ipykernel)",
    
       "language": "python",
    
       "name": "python3"
    
    schmittu's avatar
    schmittu committed
      "language_info": {
    
       "codemirror_mode": {
        "name": "ipython",
        "version": 3
       },
       "file_extension": ".py",
       "mimetype": "text/x-python",
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
    
       "version": "3.10.10"
    
    schmittu's avatar
    schmittu committed
      }
     },
     "nbformat": 4,
    
    schmittu's avatar
    schmittu committed
     "nbformat_minor": 4