07_regression.ipynb

     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "search = GridSearchCV(p, param_grid, scoring=\"neg_median_absolute_error\", cv=4, n_jobs=4)\n",
    "\n",
    "search.fit(features, values)\n",
    "\n",
    "\n",
    "print(search.best_params_)\n",
    "eval_regression(search)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exercise section\n",
    "\n",
    "- Play with the examples above and try different algorithms, metrics and pipelines.\n",
    "\n",
    "\n",
    "## Optional exercise\n",
    "\n",
    "- Split the dataset into one with `kind=\"sockeye\"` and one with `kind=\"atlantic\"` and build individual regression models for both. How does this approach compare to the results we got before ?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
      "         normalize=False)\n",
      "! FULL DATASET:  best_score    = -3.50\n",
      "  ATLANTIC    :  best_score    = -3.83\n",
      "  SOCKEYE     :  best_score    = -2.52\n",
      "! COMBINED    :  average_score = -3.17\n",
      "\n",
      "DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n",
      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
      "           min_impurity_split=None, min_samples_leaf=1,\n",
      "           min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
      "           presort=False, random_state=None, splitter='best')\n",
      "! FULL DATASET:  best_score    = -3.50\n",
      "  ATLANTIC    :  best_score    = -4.15\n",
      "  SOCKEYE     :  best_score    = -2.45\n",
      "! COMBINED    :  average_score = -3.30\n",
      "\n",
      "KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='linear',\n",
      "      kernel_params=None)\n",
      "! FULL DATASET:  best_score    = -3.48\n",
      "  ATLANTIC    :  best_score    = -3.85\n",
      "  SOCKEYE     :  best_score    = -2.57\n",
      "! COMBINED    :  average_score = -3.21\n",
      "\n",
      "KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='rbf',\n",
      "      kernel_params=None)\n",
      "! FULL DATASET:  best_score    = -3.37\n",
      "  ATLANTIC    :  best_score    = -4.21\n",
      "  SOCKEYE     :  best_score    = -2.29\n",
      "! COMBINED    :  average_score = -3.25\n",
      "\n",
      "SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,\n",
      "  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,\n",
      "  tol=0.001, verbose=False)\n",
      "! FULL DATASET:  best_score    = -3.35\n",
      "  ATLANTIC    :  best_score    = -4.01\n",
      "  SOCKEYE     :  best_score    = -2.62\n",
      "! COMBINED    :  average_score = -3.32\n"
     ]
    }
   ],
   "source": [
    "#SOLUTION\n",
    "warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
    "\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.preprocessing import StandardScaler, PolynomialFeatures\n",
    "from sklearn.kernel_ridge import KernelRidge\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.tree import DecisionTreeRegressor \n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "\n",
    "sockey_indices = features[\"kind\"] == 1\n",
    "features_sockeye = features[sockey_indices]\n",
    "values_sockeye = values[sockey_indices]\n",
    "\n",
    "atlantic_indices = features[\"kind\"] == 0\n",
    "features_atlantic = features[atlantic_indices]\n",
    "values_atlantic = values[atlantic_indices]\n",
    "\n",
    "\n",
    "def eval_clf(clf):\n",
    "    print(clf)\n",
    "    p = make_pipeline(PolynomialFeatures(), PCA(), DecisionTreeRegressor())\n",
    "\n",
    "    param_grid = {'polynomialfeatures__degree': range(3, 12),\n",
    "                  'pca__n_components': range(1, 10),\n",
    "                 }\n",
    "\n",
    "    search = GridSearchCV(p, param_grid, scoring=\"neg_median_absolute_error\", cv=4, n_jobs=4)\n",
    "\n",
    "    search.fit(features, values)\n",
    "    print(\"! FULL DATASET:  best_score    = {:.2f}\".format(search.best_score_))\n",
    "    score_full = search.best_score_\n",
    "\n",
    "    search.fit(features_atlantic, values_atlantic)\n",
    "    print(\"  ATLANTIC    :  best_score    = {:.2f}\".format(search.best_score_))\n",
    "    score_atlantic = search.best_score_\n",
    "\n",
    "    search.fit(features_sockeye, values_sockeye)\n",
    "    print(\"  SOCKEYE     :  best_score    = {:.2f}\".format(search.best_score_))\n",
    "    score_sockeye = search.best_score_\n",
    "\n",
    "    print(\"! COMBINED    :  average_score = {:.2f}\".format((score_atlantic + score_sockeye) / 2.0))\n",
    "    \n",
    "    \n",
    "eval_clf(LinearRegression())\n",
    "print()\n",
    "\n",
    "eval_clf(DecisionTreeRegressor())\n",
    "print()\n",
    "\n",
    "eval_clf(KernelRidge())\n",
    "print()\n",
    "\n",
    "eval_clf(KernelRidge(kernel=\"rbf\"))\n",
    "print()\n",
    "\n",
    "eval_clf(SVR())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}