From 453040d761368fde3ac965dcffc2a4bff2cfd83a Mon Sep 17 00:00:00 2001 From: Uwe Schmitt <uwe.schmitt@id.ethz.ch> Date: Mon, 24 Sep 2018 21:08:27 +0200 Subject: [PATCH] reformatted proposal --- .gitignore | 2 + content.md | 161 ------------------- machine_learning_workshop_proposal.ipynb | 195 +++++++++++++++++++++++ 3 files changed, 197 insertions(+), 161 deletions(-) create mode 100644 .gitignore delete mode 100644 content.md create mode 100644 machine_learning_workshop_proposal.ipynb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60815f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.ipynb_checkpoints/ +venv* diff --git a/content.md b/content.md deleted file mode 100644 index 3eb413b..0000000 --- a/content.md +++ /dev/null @@ -1,161 +0,0 @@ -# Targeted audience - -- Researchers having no machine learning experience yet. -- Basic Python knowledge. -- Almost no math knowledge required. - -# Course structure - -- Two days workshop, 1.5 days workshop + .5 day working on own data / prepared data. -- Every part below includes a coding session using Jupyter notebooks. -- Coding sessions provide code frames which should be completed. -- We provide solutions. - - -# Day 1 - -## Part 0: Preparation - -- Quick basics matplotlib, numpy, pandas? - - -### Coding session - -- read dataframe from csv or excel sheet with beer features -- do some features vs features scatter plots - - -## Part 1: Introduction - -- What is machine learning ? -- What are features / samples / feature matrix ? -- Learning problems: supervised / unsupervised - - -### Code walkthrough: - - - Classification: linear SVM classifier or logistic regression example - - Clustering: scikit-learn example to find clusters. - - -## Part 2: classification - - Intention: demonstrate one / two simple examples of classifiers, also - introduce the concept of decision boundary - - - Introduction: some simple two dimensional examples incl. decision function. - - - Idea of linear classifier: - - simple linear classifier (linear SVM e.g.) - - beer example with some weights - - - Discuss code example with logistic regression for beer data, show weights - -### Coding session: - - - Change given code to use a linear SVM classifier - - Use different data set which can not be classified well with a linear classifier - - -## Part 3: accuracy, F1, ROC, ... - -Intention: accuracy is useful but has pitfalls - -- how to measure accuracy ? - - - confusion matrix - - accurarcy - - pitfalls for unbalanced data sets - e.g. diagnose HIV - - precision / recall - -### Coding session - -- Evaluate accuracy of linear beer classifier from latest section -- Determine precision / recall - - -## Part 4: underfitting/overfitting - -classifiers / regressors have parameters / degrees of freedom. - -- underfitting: linear classifier on nonlinear problem - -- overfitting: - - - features have actual noise, or not enough information: orchid example in 2d. elevate to 3d using another feature. - - polynome of degree 5 to fit points on a line + noise - - points in a circle: draw very exact boundary line - -- how to check underfitting / overfitting ? - - - measure accuracy or other metric on test dataset - - cross validation - - -### Coding session: - -- How to do cross validation with scikit-learn -- run cross validation on classifier for beer data - - -## Part 5: pipelines / parameter tuning with scikit-learn - -- Scikit learn API incl. summary of what we have seen up to now. -- pipelines, preprocessing (scaler, PCA) -- cross validation -- Hyper parameter tuning: grid search / random search. - -### Coding session - -- examples - - -# DAY 2 - -## Part 6: Overview classifiers - -- Nearest neighbours -- SVMs - - demo for RBF: different parameters influence on decision line -- Random forests -- Gradient Tree Boosting - - -### Coding session - -- Prepare examples for 2d classification problems incl. visualization of different - decision surfaces. - -- Play with different classifiers on beer data - -## Part 7: Regression - -- What are differences compared to classification: output, how to measure accuracy, ... - -- Example: fit polynomial, examples for underfitting and overfitting - - -### Coding session - -Introduce movie data set, learn SVR or other regressor on this data set. - - -## Part 8: Introduction neural networks - - -- Overview of the field -- Introduction to feed forward neural networks -- Demo Keras - -### Coding Session - -- keras reuse network and play with it. - - -## Workshop - -- assist to setup the workshop material on own computer. -- provide example problems if attendees don't bring own data. - - diff --git a/machine_learning_workshop_proposal.ipynb b/machine_learning_workshop_proposal.ipynb new file mode 100644 index 0000000..defa9c2 --- /dev/null +++ b/machine_learning_workshop_proposal.ipynb @@ -0,0 +1,195 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction to machine-learning with Python\n", + "\n", + "\n", + "\n", + "### Targeted audience\n", + "\n", + "- Researchers having no machine learning experience yet.\n", + "- Basic Python knowledge.\n", + "- Almost no math knowledge required.\n", + "\n", + "### Course structure\n", + "\n", + "- Two days workshop, 1.5 days workshop + .5 day working on own data / prepared data.\n", + "- Every part below includes a coding session using Jupyter notebooks.\n", + "- Coding sessions provide code frames which should be completed.\n", + "- We provide solutions.\n", + "\n", + "\n", + "## Day 1\n", + "\n", + "### Part 0: Preparation\n", + "\n", + "- Quick basics matplotlib, numpy, pandas?\n", + "\n", + "\n", + "#### Coding session\n", + "\n", + "- read dataframe from csv or excel sheet with beer features\n", + "- do some features vs features scatter plots\n", + "\n", + "\n", + "### Part 1: Introduction\n", + "\n", + "- What is machine learning ?\n", + "- What are features / samples / feature matrix ?\n", + "- Learning problems: supervised / unsupervised\n", + "\n", + "\n", + "#### Code walkthrough:\n", + "\n", + " - Classification: linear SVM classifier or logistic regression example\n", + " - Clustering: scikit-learn example to find clusters.\n", + "\n", + "\n", + "### Part 2: classification\n", + "\n", + " Intention: demonstrate one / two simple examples of classifiers, also\n", + " introduce the concept of decision boundary\n", + "\n", + " - Introduction: some simple two dimensional examples incl. decision function.\n", + "\n", + " - Idea of linear classifier:\n", + " - simple linear classifier (linear SVM e.g.)\n", + " - beer example with some weights\n", + "\n", + " - Discuss code example with logistic regression for beer data, show weights\n", + "\n", + "#### Coding session:\n", + "\n", + " - Change given code to use a linear SVM classifier\n", + " - Use different data set which can not be classified well with a linear classifier\n", + "\n", + "\n", + "### Part 3: accuracy, F1, ROC, ...\n", + "\n", + "Intention: accuracy is useful but has pitfalls\n", + "\n", + "- how to measure accuracy ?\n", + "\n", + " - confusion matrix\n", + " - accurarcy\n", + " - pitfalls for unbalanced data sets\n", + " e.g. diagnose HIV\n", + " - precision / recall\n", + "\n", + "#### Coding session\n", + "\n", + "- Evaluate accuracy of linear beer classifier from latest section\n", + "- Determine precision / recall\n", + "\n", + "\n", + "### Part 4: underfitting/overfitting\n", + "\n", + "classifiers / regressors have parameters / degrees of freedom.\n", + "\n", + "- underfitting: linear classifier on nonlinear problem\n", + "\n", + "- overfitting:\n", + "\n", + " - features have actual noise, or not enough information: orchid example in 2d. elevate to 3d using another feature.\n", + " - polynome of degree 5 to fit points on a line + noise\n", + " - points in a circle: draw very exact boundary line\n", + "\n", + "- how to check underfitting / overfitting ?\n", + "\n", + " - measure accuracy or other metric on test dataset\n", + " - cross validation\n", + "\n", + "\n", + "#### Coding session:\n", + "\n", + "- How to do cross validation with scikit-learn\n", + "- run cross validation on classifier for beer data\n", + "\n", + "\n", + "### Part 5: pipelines / parameter tuning with scikit-learn\n", + "\n", + "- Scikit learn API incl. summary of what we have seen up to now.\n", + "- pipelines, preprocessing (scaler, PCA)\n", + "- cross validation\n", + "- Hyper parameter tuning: grid search / random search.\n", + "\n", + "#### Coding session\n", + "\n", + "- examples\n", + "\n", + "\n", + "## DAY 2\n", + "\n", + "### Part 6: Overview classifiers\n", + "\n", + "- Nearest neighbours\n", + "- SVMs\n", + " - demo for RBF: different parameters influence on decision line\n", + "- Random forests\n", + "- Gradient Tree Boosting\n", + "\n", + "\n", + "#### Coding session\n", + "\n", + "- Prepare examples for 2d classification problems incl. visualization of different\n", + " decision surfaces.\n", + "\n", + "- Play with different classifiers on beer data\n", + "\n", + "### Part 7: Regression\n", + "\n", + "- What are differences compared to classification: output, how to measure accuracy, ...\n", + "\n", + "- Example: fit polynomial, examples for underfitting and overfitting\n", + "\n", + "\n", + "#### Coding session\n", + "\n", + "Introduce movie data set, learn SVR or other regressor on this data set.\n", + "\n", + "\n", + "### Part 8: Introduction neural networks\n", + "\n", + "\n", + "- Overview of the field\n", + "- Introduction to feed forward neural networks\n", + "- Demo Keras\n", + "\n", + "#### Coding Session\n", + "\n", + "- keras reuse network and play with it.\n", + "\n", + "\n", + "## Workshop\n", + "\n", + "- assist to setup the workshop material on own computer.\n", + "- provide example problems if attendees don't bring own data.\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- GitLab