07_regression.ipynb

    "\n",
    "- Split the dataset into one with `kind=\"sockeye\"` and one with `kind=\"atlantic\"` and build individual regression models for both. How does this approach compare to the results we got before ?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "scrolled": true,
    "tags": [
     "solution"
    ]
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
      "         normalize=False)\n",
      "ONE MODEL       :  best_score    = -3.47\n",
      "  ATLANTIC ONLY :  best_score    = -4.30\n",
      "  SOCKEYE  ONLY :  best_score    = -2.62\n",
      "BOTH COMBINED   :  average_score = -3.46\n",
      "\n",
      "DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n",
      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
      "           min_impurity_split=None, min_samples_leaf=1,\n",
      "           min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
      "           presort=False, random_state=None, splitter='best')\n",
      "ONE MODEL       :  best_score    = -3.42\n",
      "  ATLANTIC ONLY :  best_score    = -3.97\n",
      "  SOCKEYE  ONLY :  best_score    = -2.62\n",
      "BOTH COMBINED   :  average_score = -3.29\n",
      "\n",
      "KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='linear',\n",
      "      kernel_params=None)\n",
      "ONE MODEL       :  best_score    = -3.37\n",
      "  ATLANTIC ONLY :  best_score    = -4.18\n",
      "  SOCKEYE  ONLY :  best_score    = -2.34\n",
      "BOTH COMBINED   :  average_score = -3.26\n",
      "\n",
      "KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='rbf',\n",
      "      kernel_params=None)\n",
      "ONE MODEL       :  best_score    = -3.50\n",
      "  ATLANTIC ONLY :  best_score    = -3.83\n",
      "  SOCKEYE  ONLY :  best_score    = -2.46\n",
      "BOTH COMBINED   :  average_score = -3.14\n",
      "\n",
      "SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,\n",
      "  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,\n",
      "  tol=0.001, verbose=False)\n",
      "ONE MODEL       :  best_score    = -3.27\n",
      "  ATLANTIC ONLY :  best_score    = -4.07\n",
      "  SOCKEYE  ONLY :  best_score    = -2.47\n",
      "BOTH COMBINED   :  average_score = -3.27\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn.preprocessing import StandardScaler, PolynomialFeatures\n",
    "from sklearn.kernel_ridge import KernelRidge\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.tree import DecisionTreeRegressor \n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "\n",
    "sockey_indices = features[\"kind\"] == 1\n",
    "features_sockeye = features[sockey_indices]\n",
    "values_sockeye = values[sockey_indices]\n",
    "\n",
    "atlantic_indices = features[\"kind\"] == 0\n",
    "features_atlantic = features[atlantic_indices]\n",
    "values_atlantic = values[atlantic_indices]\n",
    "\n",
    "\n",
    "def eval_clf(clf):\n",
    "    print(clf)\n",
    "    p = make_pipeline(PolynomialFeatures(), PCA(), DecisionTreeRegressor())\n",
    "\n",
    "    param_grid = {'polynomialfeatures__degree': range(3, 12),\n",
    "                  'pca__n_components': range(1, 10),\n",
    "                 }\n",
    "\n",
    "    search = GridSearchCV(p, param_grid, scoring=\"neg_median_absolute_error\", cv=4, n_jobs=4)\n",
    "\n",
    "    search.fit(features, values)\n",
    "    print(\"ONE MODEL       :  best_score    = {:.2f}\".format(search.best_score_))\n",
    "    score_full = search.best_score_\n",
    "\n",
    "    search.fit(features_atlantic, values_atlantic)\n",
    "    print(\"  ATLANTIC ONLY :  best_score    = {:.2f}\".format(search.best_score_))\n",
    "    score_atlantic = search.best_score_\n",
    "\n",
    "    search.fit(features_sockeye, values_sockeye)\n",
    "    print(\"  SOCKEYE  ONLY :  best_score    = {:.2f}\".format(search.best_score_))\n",
    "    score_sockeye = search.best_score_\n",
    "\n",
    "    print(\"BOTH COMBINED   :  average_score = {:.2f}\".format((score_atlantic + score_sockeye) / 2.0))\n",
    "    print()\n",
    "    \n",
    "    \n",
    "eval_clf(LinearRegression())\n",
    "eval_clf(DecisionTreeRegressor())\n",
    "eval_clf(KernelRidge())\n",
    "eval_clf(KernelRidge(kernel=\"rbf\"))\n",
    "eval_clf(SVR())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}