diff --git a/02_classification.ipynb b/02_classification.ipynb index 06bd63bcfee78f4f27a21595cd8f080b41562fb6..268af885bf932631e5c29411ce299315eaa4b32d 100644 --- a/02_classification.ipynb +++ b/02_classification.ipynb @@ -700,7 +700,7 @@ " weight_x * x + weight_y * y = threshold\n", " \n", "\n", - "can be rearranged to the form `y = m * x n` and thus defines a line in 2D space. Points fulfilling\n", + "can be rearranged to the form `y = a * x + b` and thus defines a line in 2D space. Points fulfilling\n", "\n", " weight_x * x + weight_y * y < threshold\n", " \n", @@ -721,7 +721,7 @@ "\n", "We \n", "\n", - "1. define a decision line (weights and threshold),\n", + "1. define a decision line (using weights and threshold),\n", "2. create random 2D samples,\n", "3. compute scores for the samples,\n", "4. split points according to their score compared to the threshold,\n", @@ -809,7 +809,7 @@ "cell_type": "code", "execution_count": 10, "metadata": { - "scrolled": true + "scrolled": false }, "outputs": [ { diff --git a/03_overfitting_and_cross_validation.ipynb b/03_overfitting_and_cross_validation.ipynb index 2fc879894b49d69ab11f3e1378bf916a2248a050..ddc615d7abbe6e428d40b8918667b092c90f6600 100644 --- a/03_overfitting_and_cross_validation.ipynb +++ b/03_overfitting_and_cross_validation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -113,7 +113,7 @@ "<IPython.core.display.HTML object>" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -149,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "scrolled": true }, @@ -199,7 +199,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -217,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -270,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -378,7 +378,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -452,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -554,7 +554,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -717,7 +717,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -726,7 +726,7 @@ "(300, 5)" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -752,7 +752,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -802,7 +802,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "scrolled": true }, @@ -842,7 +842,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 27, "metadata": { "tags": [ "solution" @@ -853,11 +853,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "2-fold accuracy score is 0.840 +/- 0.019\n", - "5-fold accuracy score is 0.844 +/- 0.045\n", - "10-fold accuracy score is 0.851 +/- 0.073\n", - "25-fold accuracy score is 0.857 +/- 0.106\n", - "50-fold accuracy score is 0.858 +/- 0.148\n", + " 2-fold accuracy score is 0.840 +/- 0.019\n", + " 5-fold accuracy score is 0.844 +/- 0.045\n", + " 10-fold accuracy score is 0.851 +/- 0.073\n", + " 25-fold accuracy score is 0.857 +/- 0.106\n", + " 50-fold accuracy score is 0.858 +/- 0.148\n", "150-fold accuracy score is 0.859 +/- 0.241\n" ] } @@ -884,7 +884,7 @@ " scores = cross_val_score(classifier, features, labels, scoring=\"accuracy\", cv=k)\n", " m = scores.mean()\n", " s = scores.std()\n", - " print(\"{:d}-fold accuracy score is {:.3f} +/- {:.3f}\".format(k, m, s))\n", + " print(\"{:3d}-fold accuracy score is {:.3f} +/- {:.3f}\".format(k, m, s))\n", "\n", "#\n", "# Q: What happens with the score?\n", @@ -919,7 +919,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "tags": [ "solution" @@ -1021,7 +1021,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -1218,7 +1218,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Moreover, we introduce use of explicit speficiation of a cross-validation method: `StratifiedKFold` from `sklearn.model_selection`. This allows us to spilt data during cross validation in the same way as we did with `train_test_split`, i.e. a) with data shufflling before split, and b) perserving class-proportions of samples, " + "Moreover, we introduce use of explicit speficiation of a cross-validation method: `StratifiedKFold` from `sklearn.model_selection`. \n", + "\n", + "This allows us to spilt data during cross validation in the same way as we did with `train_test_split`, i.e. \n", + "\n", + "a) with data shufflling before split, and \n", + "\n", + "b) perserving class-proportions of samples, " ] }, {