Skip to content
Snippets Groups Projects
decision_boundaries.ipynb 69.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • schmittu's avatar
    schmittu committed
    {
     "cells": [
      {
       "cell_type": "code",
       "execution_count": 29,
       "metadata": {},
       "outputs": [],
       "source": [
        "import numpy as np\n",
        "import matplotlib\n",
        "import matplotlib.pyplot as plt\n",
        "%matplotlib inline"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 3,
       "metadata": {},
       "outputs": [
        {
         "data": {
          "application/javascript": [
           "// avoids scrollboxes for plots\n",
           "IPython.OutputArea.prototype._should_scroll = function(lines) {\n",
           "    return false;\n",
           "}"
          ],
          "text/plain": [
           "<IPython.core.display.Javascript object>"
          ]
         },
         "metadata": {},
         "output_type": "display_data"
        }
       ],
       "source": [
        "%%javascript\n",
        "// avoids scrollboxes for plots\n",
        "IPython.OutputArea.prototype._should_scroll = function(lines) {\n",
        "    return false;\n",
        "}"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 4,
       "metadata": {},
       "outputs": [],
       "source": [
        "def plot_decision_surface(model, marker=\".\", N=400):\n",
        "    x, y = np.linspace(-1, 1, N), np.linspace(-1, 1, N)\n",
        "    points = np.array(np.meshgrid(x, y)).T.reshape(-1, 2)\n",
        "    classes = np.array(model(points)) \n",
        "    plt.figure(figsize=(6, 6))\n",
        "    plt.plot(points[classes][:, 0], points[classes][:, 1], \"g\" + marker, markersize=1, alpha=.05);\n",
        "    plt.plot(points[~classes][:, 0], points[~classes][:, 1], \"r\" + marker, markersize=1, alpha=.05);\n",
        "\n",
        "\n",
        "def plot_classifier_surface(clf, points=None, marker=\"x\", N=200):\n",
        "    if points is None:\n",
        "        x, y = np.linspace(-1, 1, N), np.linspace(-1, 1, N)\n",
        "        points = np.array(np.meshgrid(x, y)).T.reshape(-1, 2)\n",
        "    \n",
        "    classes = np.array(clf.predict(points)).astype(float) \n",
        "    levels = sorted(set(classes))\n",
        "    print(\"levels in contour plot:\", levels)\n",
        "    \n",
        "    plt.contour(points[:, 0].reshape(N, N), points[:, 1].reshape(N, N), classes.reshape(N, N), levels=levels, alpha=.5)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 31,
       "metadata": {
        "scrolled": true
       },
       "outputs": [
        {
         "data": {
          "image/png": "\n",
          "text/plain": [
           "<Figure size 432x432 with 1 Axes>"
          ]
         },
         "metadata": {
          "needs_background": "light"
         },
         "output_type": "display_data"
        }
       ],
       "source": [
        "# some functions for creating labels\n",
        "\n",
        "def np_array_adapter(function):\n",
        "    def wrapped(p):\n",
        "        assert p.shape[1] == 2, \"matrix must have two columns\"\n",
        "        x, y = p[:, 0], p[:, 1]\n",
        "        return function(x, y)\n",
        "    return wrapped\n",
        "\n",
        "@np_array_adapter\n",
        "def xor(x, y):\n",
        "    return x * y > 0\n",
        "\n",
        "@np_array_adapter\n",
        "def circle(x, y):\n",
        "    return x ** 2 +  y ** 2 > .7\n",
        "\n",
        "@np_array_adapter\n",
        "def ellipsis(x, y):\n",
        "    return 3 * x ** 2 +  .7 * y ** 2 + 2 * x * y < .3\n",
        "\n",
        "@np_array_adapter\n",
        "def two_circles(x, y):\n",
        "    return np.logical_or((x-.3) ** 2 +  (y - .3) ** 2 <.15, (x+.4) ** 2 +  (y + .4) ** 2 < .25)\n",
        "\n",
        "plot_decision_surface(xor)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 44,
       "metadata": {},
       "outputs": [],
       "source": [
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.svm import SVC\n",
        "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
        "from sklearn.tree import DecisionTreeClassifier\n",
        "from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB\n",
        "from sklearn.metrics import accuracy_score\n",
        "from sklearn.model_selection import cross_val_score"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 54,
       "metadata": {},
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "accuracy is 0.985\n",
          "crossval mean score is 0.8249921862791062\n",
          "levels in contour plot: [0.0, 1.0]\n"
         ]
        },
        {
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "/Users/uweschmitt/Projects/machinelearning-introduction-workshop/venv3.6/lib/python3.6/site-packages/matplotlib/contour.py:1230: UserWarning: No contour levels were found within the data range.\n",
          "  warnings.warn(\"No contour levels were found\"\n"
         ]
        },
        {
         "data": {
          "image/png": "\n",
          "text/plain": [
           "<Figure size 432x432 with 1 Axes>"
          ]
         },
         "metadata": {
          "needs_background": "light"
         },
         "output_type": "display_data"
        }
       ],
       "source": [
        "clf = LogisticRegression()\n",
        "clf = SVC(C=10, gamma=10) \n",
        "# clf = RandomForestClassifier()\n",
        "# clf = DecisionTreeClassifier(max_depth=19)\n",
        "# clf = GradientBoostingClassifier()\n",
        "\n",
        "\n",
        "\n",
        "N = 20\n",
        "x, y = np.linspace(-1, 1, N), np.linspace(-1, 1, N)\n",
        "points = np.array(np.meshgrid(x, y)).T.reshape(-1, 2)\n",
        "points.shape\n",
        "\n",
        "model = two_circles\n",
        "classes = np.array(model(points))\n",
        "    \n",
        "clf.fit(points, classes)\n",
        "\n",
        "\n",
        "print(\"accuracy is\", accuracy_score(classes, clf.predict(points)))\n",
        "scores = cross_val_score(clf, points, classes, cv=5)\n",
        "print(\"crossval mean score is\", np.mean(scores))\n",
        "\n",
        "plot_decision_surface(model)\n",
        "\n",
        "plt.plot(points[:, 0], points[:, 1], \"k.\", markersize=2)\n",
        "plt.title(\"dots are feature points\\nbg color is exact decision surface\\nlines are decision lines from clf\")\n",
        "\n",
        "plot_classifier_surface(clf)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": []
      }
     ],
     "metadata": {
      "kernelspec": {
       "display_name": "Python 3",
       "language": "python",
       "name": "python3"
      },
      "language_info": {
       "codemirror_mode": {
        "name": "ipython",
        "version": 3
       },
       "file_extension": ".py",
       "mimetype": "text/x-python",
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
       "version": "3.6.6"
      }
     },
     "nbformat": 4,
     "nbformat_minor": 2
    }