Skip to content
Snippets Groups Projects
07_regression.ipynb 473 KiB
Newer Older
  • Learn to ignore specific revisions
  •      },
         "output_type": "display_data"
        }
       ],
       "source": [
    
        "warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
    
    schmittu's avatar
    schmittu committed
        "from sklearn.model_selection import GridSearchCV\n",
        "\n",
        "search = GridSearchCV(p, param_grid, scoring=\"neg_median_absolute_error\", cv=4, n_jobs=4)\n",
        "\n",
        "search.fit(features, values)\n",
        "\n",
        "\n",
        "print(search.best_params_)\n",
    
        "eval_regression(search)"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
        "# Exercise section\n",
        "\n",
    
    schmittu's avatar
    schmittu committed
        "- Play with the examples above and try different algorithms, metrics and pipelines.\n",
    
        "\n",
        "\n",
        "## Optional exercise\n",
        "\n",
    
    schmittu's avatar
    schmittu committed
        "- Split the dataset into one with `kind=\"sockeye\"` and one with `kind=\"atlantic\"` and build individual regression models for both. How does this approach compare to the results we got before ?"
    
       "execution_count": 15,
    
    schmittu's avatar
    schmittu committed
       "metadata": {
        "scrolled": true
       },
       "outputs": [
        {
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
          "         normalize=False)\n",
    
          "! FULL DATASET:  best_score    = -3.50\n",
          "  ATLANTIC    :  best_score    = -3.83\n",
          "  SOCKEYE     :  best_score    = -2.52\n",
          "! COMBINED    :  average_score = -3.17\n",
    
    schmittu's avatar
    schmittu committed
          "\n",
          "DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n",
          "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
          "           min_impurity_split=None, min_samples_leaf=1,\n",
          "           min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
          "           presort=False, random_state=None, splitter='best')\n",
    
          "! FULL DATASET:  best_score    = -3.50\n",
          "  ATLANTIC    :  best_score    = -4.15\n",
          "  SOCKEYE     :  best_score    = -2.45\n",
          "! COMBINED    :  average_score = -3.30\n",
    
    schmittu's avatar
    schmittu committed
          "\n",
          "KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='linear',\n",
          "      kernel_params=None)\n",
    
          "! FULL DATASET:  best_score    = -3.48\n",
          "  ATLANTIC    :  best_score    = -3.85\n",
          "  SOCKEYE     :  best_score    = -2.57\n",
          "! COMBINED    :  average_score = -3.21\n",
    
    schmittu's avatar
    schmittu committed
          "\n",
          "KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='rbf',\n",
          "      kernel_params=None)\n",
    
          "! FULL DATASET:  best_score    = -3.37\n",
          "  ATLANTIC    :  best_score    = -4.21\n",
          "  SOCKEYE     :  best_score    = -2.29\n",
          "! COMBINED    :  average_score = -3.25\n",
    
    schmittu's avatar
    schmittu committed
          "\n",
          "SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,\n",
          "  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,\n",
          "  tol=0.001, verbose=False)\n",
    
          "! FULL DATASET:  best_score    = -3.35\n",
          "  ATLANTIC    :  best_score    = -4.01\n",
          "  SOCKEYE     :  best_score    = -2.62\n",
          "! COMBINED    :  average_score = -3.32\n"
    
    schmittu's avatar
    schmittu committed
         ]
        }
       ],
       "source": [
        "#SOLUTION\n",
    
        "warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
    
    schmittu's avatar
    schmittu committed
        "\n",
        "from sklearn.pipeline import make_pipeline\n",
        "from sklearn.preprocessing import StandardScaler, PolynomialFeatures\n",
        "from sklearn.kernel_ridge import KernelRidge\n",
        "from sklearn.linear_model import LinearRegression\n",
        "from sklearn.svm import SVR\n",
        "from sklearn.tree import DecisionTreeRegressor \n",
        "from sklearn.model_selection import cross_val_score\n",
        "from sklearn.decomposition import PCA\n",
        "\n",
        "\n",
        "sockey_indices = features[\"kind\"] == 1\n",
        "features_sockeye = features[sockey_indices]\n",
        "values_sockeye = values[sockey_indices]\n",
        "\n",
        "atlantic_indices = features[\"kind\"] == 0\n",
        "features_atlantic = features[atlantic_indices]\n",
        "values_atlantic = values[atlantic_indices]\n",
        "\n",
        "\n",
        "def eval_clf(clf):\n",
        "    print(clf)\n",
        "    p = make_pipeline(PolynomialFeatures(), PCA(), DecisionTreeRegressor())\n",
        "\n",
        "    param_grid = {'polynomialfeatures__degree': range(3, 12),\n",
        "                  'pca__n_components': range(1, 10),\n",
        "                 }\n",
        "\n",
        "    search = GridSearchCV(p, param_grid, scoring=\"neg_median_absolute_error\", cv=4, n_jobs=4)\n",
        "\n",
        "    search.fit(features, values)\n",
        "    print(\"! FULL DATASET:  best_score    = {:.2f}\".format(search.best_score_))\n",
        "    score_full = search.best_score_\n",
        "\n",
        "    search.fit(features_atlantic, values_atlantic)\n",
        "    print(\"  ATLANTIC    :  best_score    = {:.2f}\".format(search.best_score_))\n",
        "    score_atlantic = search.best_score_\n",
        "\n",
        "    search.fit(features_sockeye, values_sockeye)\n",
        "    print(\"  SOCKEYE     :  best_score    = {:.2f}\".format(search.best_score_))\n",
        "    score_sockeye = search.best_score_\n",
        "\n",
        "    print(\"! COMBINED    :  average_score = {:.2f}\".format((score_atlantic + score_sockeye) / 2.0))\n",
        "    \n",
        "    \n",
        "eval_clf(LinearRegression())\n",
        "print()\n",
        "\n",
        "eval_clf(DecisionTreeRegressor())\n",
        "print()\n",
        "\n",
        "eval_clf(KernelRidge())\n",
        "print()\n",
        "\n",
        "eval_clf(KernelRidge(kernel=\"rbf\"))\n",
        "print()\n",
        "\n",
        "eval_clf(SVR())"
       ]
    
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": []
    
      }
     ],
     "metadata": {
      "kernelspec": {
       "display_name": "Python 3",
       "language": "python",
       "name": "python3"
      },
      "language_info": {
       "codemirror_mode": {
        "name": "ipython",
        "version": 3
       },
       "file_extension": ".py",
       "mimetype": "text/x-python",
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
       "version": "3.7.2"
      },
      "latex_envs": {
       "LaTeX_envs_menu_present": true,
       "autoclose": false,
       "autocomplete": true,
       "bibliofile": "biblio.bib",
       "cite_by": "apalike",
       "current_citInitial": 1,
       "eqLabelWithNumbers": true,
       "eqNumInitial": 1,
       "hotkeys": {
        "equation": "Ctrl-E",
        "itemize": "Ctrl-I"
       },
       "labels_anchors": false,
       "latex_user_defs": false,
       "report_style_numbering": false,
       "user_envs_cfg": false
      }
     },
     "nbformat": 4,
     "nbformat_minor": 2
    }