Commit 1afed144 by 20200318086

hw2 submited

parents
course-info @ 0458c1ae
Subproject commit 0458c1ae373a8cb8c4b01e42b6689046dfe42eeb
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 4
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"class LDA():\n",
" def __init__(self):\n",
" pass\n",
" def fit(self,X,y):\n",
" print (np.unique(y).shape[0]==2)\n",
" if np.unique(y).shape[0]==2:\n",
" zero_idx = np.argwhere(y == 0)\n",
" one_idx = np.argwhere(y == 1)\n",
" x_0 = X[zero_idx].squeeze()\n",
" x_1 = X[one_idx].squeeze()\n",
" # x_0 = np.array([[4,2],[2,4],[2,3],[3,6],[4,4]])\n",
" # x_1 = np.array([[9,10],[6,8],[9,5],[8,7],[10,8]])\n",
" miu_0 = np.mean(x_0,axis=0)\n",
" # print (miu_0)\n",
" miu_1 = np.mean(x_1,axis=0)\n",
" # print (miu_1)\n",
" # print (x_0-miu_0)\n",
" cov_0 = (x_0-miu_0).T.dot((x_0-miu_0))\n",
"# print (cov_0)\n",
"# print ((x_0-miu_0).T.dot((x_0-miu_0)))\n",
" # cov_0 = np.cov((x_0-miu_0).T)\n",
" # print (cov_0)\n",
" # print (cov_0.shape)\n",
" cov_1 = (x_1-miu_1).T.dot((x_1-miu_1))\n",
" # cov_1 = np.cov((x_1-miu_1).T)\n",
" # print((miu_0-miu_1))\n",
" s_b = np.dot((miu_0-miu_1).reshape(-1,1),(miu_0-miu_1).reshape(1,-1))\n",
" # print (s_b)\n",
" s_w = cov_0+cov_1\n",
" # print (s_w)\n",
" s_w_inv = np.mat(s_w).I\n",
" # print (s_w_inv)\n",
" # s_w_inv = np.linalg.pinv(s_w)\n",
" s_x = np.dot(s_w_inv,s_b)\n",
" e_vals,e_vecs = np.linalg.eig(s_x)\n",
" # print (e_vals,e_vecs)\n",
" e_vals_max_idx = np.argmax(e_vals)\n",
"\n",
" return e_vals[e_vals_max_idx],e_vecs[:,e_vals_max_idx]\n",
" else:\n",
" y_unique = np.unique(y)\n",
" miu = np.zeros((y.shape[0],X.shape[1]))\n",
" for y_ in y_unique:\n",
" miu[y] = X[np.argwhere(y == y_)]\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\hsh\\Miniconda3\\envs\\venv\\lib\\site-packages\\sklearn\\discriminant_analysis.py:463: ChangedBehaviorWarning: n_components cannot be larger than min(n_features, n_classes - 1). Using min(n_features, n_classes - 1) = min(2, 2 - 1) = 1 components.\n",
" ChangedBehaviorWarning)\n",
"C:\\Users\\hsh\\Miniconda3\\envs\\venv\\lib\\site-packages\\sklearn\\discriminant_analysis.py:469: FutureWarning: In version 0.23, setting n_components > min(n_features, n_classes - 1) will raise a ValueError. You should set n_components to None (default), or a value smaller or equal to min(n_features, n_classes - 1).\n",
" warnings.warn(future_msg, FutureWarning)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD8CAYAAACfF6SlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3df3RcZbkv8O8z00lNWghkyu+aBK49eJVUfuRw8VY9eKP8aE+hsK4VnWquRYMCl3LuUSh3WKVwyLLWcxbUc+VHkEI5jGL0QksNyJGwVPSImoo2ReWCJQltkdJUAiWxSZPn/rFnkpnJ3vNz7z179v5+1spKZu+Zvd9ZSZ69532f93lFVUFERMESqnQDiIjIfQz+REQBxOBPRBRADP5ERAHE4E9EFEAM/kREAWRL8BeRzSKyX0R2pW1bLyJ7ReS3ya+ldpyLiIjKZ9ed/4MALjLZfoeqnpn8esKmcxERUZlsCf6q+lMAB+04FhEROW+Ow8e/VkQ+C6APwD+q6l+ynyAiHQA6AGDevHnnvPe973W4SURE/rJjx44DqnpcMa8Ru8o7iEgzgB+o6hnJxycAOABAAfwTgJNUdXWuY7S2tmpfX58t7SEiCgoR2aGqrcW8xrFsH1V9XVUnVXUKwH0AznXqXETVItGfQPOdzQjdGkLznc1I9Ccq3SQKKMe6fUTkJFV9LfnwMgC7cj2fyO8S/Ql0bO/A6MQoAGBwZBAd2zsAALGWWCWbRgFkV6rndwD8AsDpIrJHRK4EsFFE+kVkJ4CPAvgHO85FVI0S/Qm0P9Y+HfhTRidGEe+NV6hVFGS23Pmr6qdMNt9vx7GJql2iP4HV21ZjUidN9w+ODCLRn+DdP7nK6WwfosBK9CcQ741jcGQw73PZ/UNuY3kHIgek+vcLCfyA0f2z5sk1DreKaAbv/IlskLrLHxoZQmN9Iw6NH5rVv5/P8Ngwu3/INbzzJypT+l2+QjE4MojhseGSjsXBX3ILgz9RmeK98aLv8q0MjQzZchyifBj8icpkZ8BurG+07VhEuTD4E5XJroBdF6lDZ1unLcciyofBn6hMnW2dqIvU5X2eQFATrpm1DQCa6pvQtbyLg73kGmb7EJUpFbDbH2u3nMgFAArFUTVHTQ8GR2uj2HTxJgZ8qgje+ROVKL1IW7w3jo5zOvJ+AkjPAho7MuZ0E4ksMfgTlcAsvXPL77ag/QPtiNZGCzrG6MQoVj26CnKrYMHGBazwSa5i8KdAy1di2Wq/WXrn6MQonnjpCWy6eBMioUhR7RgeG8bqbat5ASDX2LaYix24mAu5KbvEMmBk3KQGXnPt/8yjn4Fi9v+OQNBY31hwWYdsTfVNGLh+oKTXUnCVspgLgz8FVvOdzaZBOhWAc+0HYLovLOGcg775CARTt0yV/HoKJk+t5EXkNdldOFZ356lJW1aTtwZHBrF00VLTwd1JnZxO3ywFJ3mRWxj8KRDMBmitgnRDbQOA3IE4NbgblvCsfWbdQYWoCddwkhe5hsGfAsFsgNYqSL89/jYS/Ymck7dGJ0Zxd9/dZXXxpIvWRrH50s3M+SfXcJIXBUIx9XfGJ8cR741PD7yuenSVQ60CIqEIHljxAIM+uY53/hQIxfalpy4WsZbY9ACvEyamJriIC1UEgz8FglkXTl2kznJCVvrFotDaPaVKLeJC5CYGfwqEWEsMXcu70FTfBIFMF1LbdPEm04tC+sBr+mudwgle5DYGfwq82jm10z9Ha6Om1TVjLTEMXD9QcOmGYqXGGYjcwgFfCoTs2bqDI4P43NbPQUQwPjk+/bxUsbXsNXk72zoRa4nh4NhBx9rIVbzITQz+FAhmqZ4TUxOznjc6MYo1T67B2JGxjAtFx/YOACirdEM+nOBFbmK3DwVCMXfVw2PDpkXb4r1xRydhcYIXuYnBnwLBjrvqoZEhxFpiCIkz/zbM9Sc3MfhTIJila0ZCkVnLKhaS/nnVOVfZ3r5y6gERlcKW4C8im0Vkv4jsStvWICI/EpGXkt+PteNcRKUwS/V8YMUD2Hzp5qLTP+9adpft7Su1HhBRqewa8H0QwP8B8FDatrUAelV1g4isTT6+0abzERXFKnsHsO5usXo+kR/YEvxV9aci0py1+VIA5yd/3gLgx2DwpwowS/NMZe9YBfRYSyxjX6oc9NDIkCOzfZ2aP0Bkxck+/xNU9TUASH4/3sFzEVmyWnKx0ElV2eWg35l4x9b21YRrsOniTbYekyifig/4ikiHiPSJSN8bb7xR6eaQD+ValKWQkgpmFw+7CARXnnUlu5TIdU4G/9dF5CQASH7fb/YkVe1S1VZVbT3uuOMcbA4FVa40z47tHZYXgFRXj1OTugBjoLdrRxfr+pDrnAz+jwNoT/7cDmCbg+cispRvURaz7p/0rh6nTepkzosQkRPsSvX8DoBfADhdRPaIyJUANgD4uIi8BODjycdErkuleVox6xYqp6unlJz9YsYgiOxgS/BX1U+p6kmqGlHVhap6v6oOq2qbqi5KfneuIhZRHrGWWEG1++3o6mmobcD8mvlFv46F3chNLOxGgZDoT+AvY38x3bd00dLp56SnhJZqeGy4pNexsBu5icGfAiHeG8cUpkz33d13N+7uu9vlFs3Gwm7kpoqnehK5wetdKvMi85juSa5i8KdA8HqXysTUBLN9yFUM/hQIXu9S4TKO5DYGfwqEXNk+XuH1rinyFwZ/CgyzUs1uiNZG8aXWL02Xjg5L2PR5Xu+aIn9h8KfASK/pD7i3gMrYkTEsaVyCgesHMHXLFLZctiXnegFEbmDwJ19LTdoK3RpC853NAICB6wegtygaahtcaUP27F2zhWW6lncx24dcxTx/8q2re67OyN8fHBnE6m2rARgBuNTJWKXI7s/PXi+AyG3ev/Pf2Q3ccQaw/hjj+87uSreIqkCiP2E6cWt8chyrHl01/SnALSEJMZWTPMXbd/47u4Ht1wETY8bjkVeNxwCweGXl2kWely9t0o1qnelSlTsB69XDiNzk7Tv/3ttmAn/KxJixnSgHt4N7NrOMHlbuJC/xdvAf2VPcdgq8RH8CCzYuqGgbmuqbMKXmdYSYy09e4e3gX7+wuO0UaKmqnG4O5GZLpWxa5ewzl5+8wtvBv20dEKnN3BapNbYTZXFyrd1ChCSE9g+0I9YSM109jLn85CXeHvBNDer23mZ09dQvNAI/B3vJRKW7VKZ0Cvc/fz+WNC6ZHtSN98YxNDKExvpGdLZ1crCXPENUtdJtmNba2qp9fX2VbgZVKacXWy9GU30Tgz25RkR2qGprMa/xdrcPURE62zoRCUUq3QwARrYRF2UnL2PwJ18RcadeTyGY2klexuBPvhHvjWN8crzSzchQ6XEIIiveHvAlKoLbgTa1PkCu1NKG2gYs2Lhg+jnR2ig2XbyJYwFUcbzzJ99wM4c+Eopg08WbcOCGA3j48odNxxrCEsabf30z4+IwPDaM1dtWcyyAKo7BvxwsOucpnW2drtXoTx9biPfGMTE1Mes5CsWkTs7aziUbyQsY/EuVKjo38ioAnSk6xwtAxcRaYvhi6xdtP67Z8o/pAdyqu8mqxEOu1xC5hcG/VCw650lLGpfYejyBWPbppwK41aIwVss1AizzQJXH4F8qFp3zpDVPrrH1eArNueZuoj+Btw6/NWtfTbgGHed0mI4F1IRrWOaBKo7Bv1QsOuc5if6EI0XdzPrtBYLBkUG0P9Zu2t9/VM1RuGvZXXhgxQMZ3UbR2ig2X7qZ2T5UcY6neorIAIC3AUwCOFLsFGTPaluXudAMwKJzFWb3XX8uCqMsitmFAQAOjh0EwOUaybvcyvP/qKoecOlc7mDROc+pZCnnbOzTJ6/jJK9yLF7JYO8RV/dcbduxBDJ9Z18Klm6mauBGn78C+HcR2SEiHdk7RaRDRPpEpO+NN95woTnkR107umw7VimBPyxhCARN9U3oWt7Frh7yPDeC/xJVPRvAxQCuEZGPpO9U1S5VbVXV1uOOO86F5pAfWfW9u6EuUoctl23B1C1TGLh+wAj8iQTQ3AyEQsb3BGf0krc4HvxVdV/y+34AjwE41+lzUvDkyql36nyWd/qJBNDRAQwOAqrG944OXgDIUxwN/iIyT0SOSv0M4AIAu5w8JwXT6dHTXT3fpE6isb4RQyNDiPfGM2v1xOPAaNZykqOjxnYij3B6wPcEAI8l66DMAfBtVf2hw+ekAPrj8B9dPV8qzx+YWbgFMFI7MWRRusFqO1EFOHrnr6q7VfUDya/3qypTIMgRuero2M0sGyhj4ZZGizRPq+1EFcAZvkQFmF8zf7qP3yobaLpYW2cnUFeXubOuzthO5BEM/uQL8yLzyj5GU32T5b5obXQ6m8esyieQVuAtFgO6uoCmJkDE+N7VZWwn8ggGf/KFe5ffW1bGz/ya+TnLLBddgjkWAwYGgKkp4zsDP3kMgz/5QqwlhvObzy/ptXNCc3DP39+TsyRD+r5U3Z5sVtuJvIjBn3zjxwM/Lvo1YQnjwRUPItYSQ2dbZ0ElmK0uEqznQ9WEwZ9y6tndgwu+fwEWb1mMC75/AXp291S6SZaKneWbmpmbmqAVa4kVVIK5s60TdZG6WcfKW8+Hs37JQ0S19AJWdmttbdW+vr5KN4OSenb3YP1/rMdfJ/+asf2Yucdg7blrsey0ZRVqmbnwbeGCUz4FgobaBhwcO4jG+kZ0tnUWVY8n0Z9AvDeOoZGhwl6fmvWbPvmrro4DwWQLEdlRbLl8Bn+ydMH3L8Br77yW8zleuRAk+hP47KOfxRTyB/9IKAIRwfjk+PS2ukidswXZmpuNMg/ZwmFjULix0UgF5YWASsDgT7ZavGVx0RUuzzvxPNx34X0Otcha853N0zNus6W6cVJ3+cNjwzg0fmjW85rqmzBw/YAzDQyFjDo/ufCTAJWolODPPn+ydOK8E4t+zXN/fg4tW1rQsqUFH37kw66NEVilYgoEB244gAM3HMDULVPobOs0Dfy5jmGLQmb3sv4PuYjBnyytOXsN3hV+V8mvf/Pwm1j77Nrpi8EXnvqCja3LZJVp01DbgOY7mxG6NYTmO5tzLvU4PUnLLqkBXpHC6/qw/g+5hMGfLC07bRnW/9f1qK+pt+V46Z8K2r7bZssxU5YuWmq6fXhsGIMjg1AoBkcGcy71+Pb425nVOcuRXtYZyN/lk8L6P+QS9vlTQXp292DDrzbgzcNvOnL8/vb+sl6fq8+/GLb1+1sN8OZSUwNs3sw+fyoaB3zJcT27e/DVX34VI+Mjjp2jlAtB6NZQWevupggEU7fYUCG0kAHebNEocOBA+eemwOGALzlu2WnL8LNP/Qz97f3Y8OENqAnV2H6OVNdQ60OF/y0XM7s2Whu1rAMUktD0+EDJXUCJhBH8i3WQ5SHIPbzzJ1s40i2U9rd5/Jyj0LvqF5ZPTfQn0LG9A6MTo5bPAWby+QHkfX5Juf9mk7kK1dRkFIEjKhK7fcgT2r7bhv1/3W/vQVWNrJkks66hRH8Ca55cM2tQN7X4SlN9U8ZM3PRZuiEJmZaHKHoMoJS+fgCIRIAHHmB/P5WEwZ88pWd3D2569iZb+uKtzJW56Pts5t9M0aUXYD1mUPQYQCl9/QAHe6ksDP7kWT27e7D22bWOnqOc2cVW2UKu3fkD7PahknHAlzxr2WnL0N/ePz1Q7ITUPIKzHzob7/nme4oauC25Ume6RAI4ZD57uCCc4EUuYvAn16VfCD55+idtP/6ETuBd896F9zW9D/OPmY+bfnJj3gtArCWGruVdaKpvml6rt6jB3tRA77D1JLK8GmyeYUyUA7t9yDNWPLYCf3rrT7YfN/U3fsV7r8DN591c/AESCaPmztCQdfXNcrp7UubMAR58kP3+VDT2+ZNvfOGpL+C5Pz/n2PHr5tRh3QfX5S9FXWgd/lIHerOx359KwOBPvtWypcWR4+YdJLa6o88O0nbc+QNGOuuUDTOMKVA44Eu+5dQYwXN/fg5nffOLOHVtD5ZseAZbn9+b+QSrQdjs7Z2dxieCcrGwG7mEwZ+qys3n3Wz7hWBi3n9AAex9cww3PdqfeQGwCsaNjZlr8sbjQHu78YmgVDU1xkWEyAXs9iFfuP2529H9YjcUOj3AK2kzgnNRBQ79cSb99Ni6CFSBN8cmACiOHXsbt/zoXqz4w0+MJ9TVGYF+yxbrsYAFC4rP/GlrA55+urjXEMGjff4ichGATQDCAL6lqpZJ3gz+VI70+j718+pxcvRkhCSU9yKgKjj0x6/mPnja/8mxcxS3PLsFK579v7OflxoLyDUAHI1aXxgefpjZPlQ0zwV/EQkD+H8APg5gD4BfA/iUqv7e7PkM/lSOXLN0P3/e5/HdF787a58qMPGX83D49RXFnSzt/2bVb36A25++13iQGrDNNVA8NJT7wsCyzlQkLw74ngvgZVXdrarjAB4BcKnD56SAslqDd2hkaHqs4JOnfxIhMf7sBSHoyAeLD/yAEeSTXw+f/fdovmE7mr+8FVs/dLmx32oA+NCh3JO5ypkkRlQEp+/8/zuAi1T188nHnwHwX1T12rTndADoAIDGxsZzBu1Il6NAKqU+z9bn9+LrT72IfW+O4eRjavHO4SPJvv4Spf0/LdmzC4lttwPvvJP5nEgEmMhxDg+Nw1F1KOXOf45TjUky62zN+MtW1S4AXYDR7eNwe8jHOts6Z9Xoz1efZ8VZp2DFWadMP976/F585Xu/w8RUiX+KaeMLP194BpqveQQAsGj/K/jRg9cZO3IF/mi0tPMSFcnp4L8HwLvTHi8EsM/hc1JAperwFFvOOV3qQrD+8RfK+wQAZFwIXjr+VDTfsB0AsOSV3yDxvVtmPz8SATZtKu+cRAVyuttnDowB3zYAe2EM+H5aVV8wez4HfMlrbt7aj4efs6fa5iWhn+GGOd04WQ5gny7AxiMrcfSv/2IMFofDRuooM32oBJ4b8FXVIwCuBfAUgD8A6LYK/ERedPuKFgxsWIZFx88z+uLTv4pwSehn2BD5FhaGDiAkwMLQAWyIfAtv/e2xxmDxP27FzXve5dC7IJqNk7yIrKRX82xoyMjEWfw/E3ir9mjjQQGTyX5Wcx0WhmancO6ZWoAPjX/DeKCaHCUTrDqvEbevcKaeEfmP5/L8i8XgT55R4ELsN3/sKjx81rKZC4DFhWD33E8jZLJrSgWnHc691gAvBJSP57p9iDwnvR5Pc7Px2Ew8njfwA8DtT9+Lga9fgoGNy7HqNz+w7BbapwtMX79P82f3PPzcEJrX9qB5bQ9i9/0i7/OJCsE7fwqOQmvzA2XX59/6n/8O1y//8vTjS8I/x4bIt1An49PbRrUGayc+j8enPlT08U84qga/jH+85PaRv7DbhyiXQmvz53puCT7+P76Bl44/FZeEf57M9hnGPo1i45GVJQX+dOwSIoDBnyi3XHfzqZo7qWUagdmfEiIR4OijgYMHZ0o0FFmOofnLW4FQeGZDgZVHraSnj+6X4/Dq2V/B315yVVnHpOrDPn+iXKxq84sYd/mqxveODmN7V5dxURAxvj/wgFF0bWrK+H7gQNEzcgf+eQUGNi7HwMblWLT/lZJTR4HZ6aMn4g28f8fN6N78L0Ufi4KHd/4UHGZ9/iLmgbfQtXRLqdtvYjp1tIhPArnSR/su+2lG2QryN975E+USi82+m7e6+RkczMwMWrDA+MrOEjp40Jam7fzXmJE1tGEZTjiqpqDXnCzmpZ9PlmF8/akXsfX5vViy4RnrJSop0Jyu7UPkHemTthobgaVLgXvusb4ArF4NjCezc9Lv7tO7hhobbRsYTnVLpWfxbH1+L778vd/hiEmhuX26AAtNLgD7NDq9JOXYxCSAmSUqAfATAQFgtw8FRTFdPoVqajIuIHffXX77rFJO02x9fi/+96M7MToxBWCmz98sfbRHP4xJk/d2yjG1+Pna/wbs7AaevBEYS35yqW0ALv4asHhl+e+FXMdsHyIrNqZuZpg/31igpRyhEPDQQ0UVdbt5az++88tXsUyenZU++qPw303f8WcTAK98+h1g2zXA5HjmzlAEWHEXLwBViMGfyEqZk7YcVVMDbN5cckXP7AVpvnLh6fj6Uy9i75tjs557yjG1+Pnc64CRV80PVv9u4B92ldQOqhwvLuZC5A1WffPldv3YYXzcGIsoMfhnL0iTkt7nDwC1kTC+cuHpwLY91gcbybGPfIXZPhQMZmvqRiLm6+xWgs1dUivOOgVfvbwFpxxTC4Fxx//Vy1uMi0T9QusXmu3b2Q187VRgfb3x9bVTjW1U1XjnT8GQuqtOL9H89tuz19etlHA4/3OKZPWJAG3rrPv829ZlbtvZPfu5YweBrVcbP3N8oGrxzp/8x6pyZyxmTNyamjIGasfHcxzEZZPmA7SOWLwSuPSbRoZPSm2D+WBv722zLxIAMDVh7KOqxTt/8pfslM70nPz0PvUhe5ZmtI0Dd/45LV5Z2F17rjGA9H07u42Lwcgeo+uobR0/FXgc7/zJX8zq8I+OAmvWZG6zqvNTKW7e+RejkPGBnd3A9lQGkRrft1/HcQGPY/Anf7G6ox8ezly4xWwA2En57uybmtxpR7Ha1gFhk3IT6eMDvbcBE1lppRNj7BbyOAZ/8peGBut98fjMz6k6P0VW5SzZ5GTui8173uNOO4pVyPiAVdcQ00Y9jcGfgiP7U0GJefUliUaNi43VJ4BnnrFeUrLSFq8EbnwFWD9ifN34SmZ/vlXXUK4uI6o4Bn/yl1xVNrP7+RMJW8oxFywWMzKNzKhmfjKpJm3rgEht5rZI7ey0UfIUBn/yl1wLtqRW6EpxM9gePGhcbEI5/uW8loFUqMUrgeXfMEpDQIzvy7/BbB+PY20f8hez6p0AMHcucPiw8XM0CmzaBKxaZf/5rcpFiBgzinPNLSh0ARmiLFzMhSh9wRZgZmWsVOAHjK6e9nZnzq9qvhqXau7AX1c3+5MJkYMY/Ml/YrGZVE6rT7ZO5tWrFjdpq6kpby1/Irtxhi/5k9lkL7eEw4VfXNjVQxXi2J2/iKwXkb0i8tvk11KnzkU0ixMLtwCFLbBeaOBnVw9VkNN3/neo6j87fA6iTIlE/jr9hQzAmik3QSIUMtI9w2Fj3IFdPVQh7PMn/4nH8wfpcBi48kqj26WQu/n015Ujlec/OQls2eLdiV1+s7MbuOMMYP0xxnfWHXI8+F8rIjtFZLOIHGv2BBHpEJE+Eel74403HG4OBUIh+fJHjgBPPGH0t//bvxV+bLMunWIuHulGR6t3Ylc1MSs89+gXAr8oTVl5/iLyNIATTXbFATwH4AAABfBPAE5S1dW5jsc8f7JFoYu1ixiB/3OfAyYmSjtXMYO7Vm2wmvVL9rjjDOs1iyO1vpiQ5nqev6p+TFXPMPnapqqvq+qkqk4BuA/AueWci6hghVbsbGw07rxLDfyAEfjnz8/9HBHr53ittLQf5SowF+Dqo05m+5yU9vAyALucOhdRhljMGEzN1x2zdKk9JRUOHbLe19RkfLq4557ZFyRm+7gjX4G5gFYfdTLbZ6OInAmj22cAwFUOnotoRiJhDKbm69K89978WUHlys7hT60h3NhoBH5m+zivbZ3R55+95kBKQKuPsrYP+U+hff5u8ND/V6Dt7AaevNFYfD5dgPv8OcOX/CWR8E7gd2uhGMovtWaxHWsNZ19IahuAi79WdRcQBn/yj1RFTy+oqTEqh5K3FLpwvZWd3cDWq4GptCSBsYPAtmtmjl8lOMmL/KOS9XyAmQljTU3A5s3sz/ej3tsyA3/K5HjVZQ3xzp/8o5KLobBAWzDkygyqsqwh3vmTP+RbJctJkYiR7hkKGYPNLNngX7kyg6osa4jBn6pfqq/fyRr92VJzCKJR4+fhYSOzZ3DQaAsvAP7Utg4IRWZvD9dU3ZrFDP5U/dzu609N3FI1Zu5mVwa1qtmTSBifDPgJoXotXgmsuMvI8EmpbQAu/WZVDfYCzPMnPwiFrNfNBezNtc/u28917vSaPWZrC9fVcQUvsgXX8KVgsqqP09hof+2c7HIMuc6dzuzTCat6Ui4Ol6Fm8KfqZ1bILVU3p9Aib4WIRmffpec6dzqrTKRKZiiRd5mVod5+na0XAAZ/qn6xmNF9kp5n395u3FV/5jNAbS0wb15556irM5+0ZXZus66cQj8hEAHGnIHsWkQTY8bMYps+DTD4kz/EYkZf/NSUcdd9//1G5o2qkYnzzjv5j2GVKhoOzw7o6YO38bhxzqkpow1mffiFfkIgAqznDIwdtO3TAIM/+c+aNcWvzQuYL6pSV2dUCM0O/B0dMxeXQtI7C/2EQAQUPmegjPUImO1D/lPqsorZolGjqyc7QFtVDeUsX7JLqs/fqgx1BoHcOsJsHyLbzJ9vfmfOwVty2uKVRqnp+ncDEON7+tyCdCXOLGZtH/KfefMK6+PPxyqYNzaa3/lz8JbslF2B1OzTQKQ2ObP4k0Ufnnf+5B+JBLBggT2BH7AO5hy8pUow+zRQxkI0vPMnf0gkgNWrSxvotXLokHHc7K6f1GMuyUhuK3c9gjQc8CV/WLDASOm0G0swUBVgeQcKLicCP8ASDORbDP5E+TCLh3yIwZ+qn12lkefPN9+uyhLM5DsM/lT97OiWiUaBe+6xLgLHRVrIZxj8qfqV2y1TUzMzkzdVgsEM+//JRxj8qfpZ5eNbVfKcO3dm+cWmJmDz5plsnlSBOKsSEez/J59g8KfqZzbpSgQ47zzzSp2qxp1+riqcLMFMPsfgT9UvFjPq96ffrasCzzxjXqlzfDx/9w1n8ZLPlRX8ReQTIvKCiEyJSGvWvptE5GUReVFELiyvmUR5PPHE7LV0c01gzNd9wxLM5HPllnfYBeByAPembxSR9wG4AsD7AZwM4GkR+RtVnSzzfETmiu2LL6T7JhZjsCffKuvOX1X/oKovmuy6FMAjqnpYVV8B8DKAc8s5F1FOxfTF19Sw+4YCz6k+/1MAvJr2eE9yG5EzCg3m0Whmdg9RQOXt9hGRpwGcaLIrrqrbrF5mss20A1ZEOgB0AEAjMymoVLEYsGqV9X6uskWUIW/wV9WPlXDcPQDenfZ4IYB9FsfvAtAFGFU9SzgXkSEatS7wxm4eogxOdfs8Dgop+r8AAAV9SURBVOAKEZkrIqcCWATgVw6di8iwaRMQicze/qUvsZuHKEu5qZ6XicgeAB8E0CMiTwGAqr4AoBvA7wH8EMA1zPQhVxx99Oxt3d2syUOUhYu5kD8kEkbhtdFR8/01NRzoJd/iYi4UXPG4deAHCpvVSxQgDP7kD4VM8mJRNqJpDP7kD4WkCTOVmGgagz/5Q2eneaZPCmf1EmVg8Cd/iMXMM30Ao6wzB3uJMjD4k38cPGi+XZWBnygLgz/5BxdgISoYgz/5BxdgISoYgz/5BxdgISpYuYu5EHkLF2AhKgjv/ImIAojBn4gogBj8iYgCiMGfiCiAGPyJiAKIwZ+IKIAY/ImIAojBn4gogBj8iYgCiMGfiCiAGPyJiAKIwZ+IKIAY/ImIAojBn4gogBj8iYgCiMGfiCiAGPyJiAKIwZ+IKIDKCv4i8gkReUFEpkSkNW17s4iMichvk1/3lN9UIiKyS7lr+O4CcDmAe032/UlVzyzz+ERE5ICygr+q/gEARMSe1hARkSvKvfPP5VQReR7AWwBuVtVnzZ4kIh0AOpIPD4vILgfbVGkLAByodCMcxPdX3fz8/vz83gDg9GJfkDf4i8jTAE402RVX1W0WL3sNQKOqDovIOQC2isj7VfWt7CeqaheAruS5+lS1Nfs5fsH3V934/qqXn98bYLy/Yl+TN/ir6seKPaiqHgZwOPnzDhH5E4C/AVB0A4mIyH6OpHqKyHEiEk7+fBqARQB2O3EuIiIqXrmpnpeJyB4AHwTQIyJPJXd9BMBOEfkdgO8D+KKqHizgkF3ltKcK8P1VN76/6uXn9waU8P5EVZ1oCBEReRhn+BIRBRCDPxFRAHki+Pu9TITV+0vuu0lEXhaRF0Xkwkq10S4isl5E9qb9zpZWuk3lEpGLkr+fl0VkbaXbYzcRGRCR/uTvq+oz8kRks4jsT58zJCINIvIjEXkp+f3YSraxHBbvr+j/O08Ef8yUifipyb4/qeqZya8vutwuu5i+PxF5H4ArALwfwEUA7kplSVW5O9J+Z09UujHlSP4+vgngYgDvA/Cp5O/Nbz6a/H35IRf+QRj/T+nWAuhV1UUAepOPq9WDmP3+gCL/7zwR/FX1D6r6YqXb4ZQc7+9SAI+o6mFVfQXAywDOdbd1lMe5AF5W1d2qOg7gERi/N/IoVf0pgOzswksBbEn+vAXAClcbZSOL91c0TwT/PE4VkedF5Cci8uFKN8ZmpwB4Ne3xnuS2anetiOxMfjyt2o/XSX79HaVTAP8uIjuS5Vb86ARVfQ0Akt+Pr3B7nFDU/51rwV9EnhaRXSZfue6iUmUizgLwvwB8W0SOdqfFxSnx/ZlVxPN87m2e93o3gP8E4EwYv79/qWhjy1eVv6MiLVHVs2F0bV0jIh+pdIOoaEX/3zlZ2C2D38tElPL+YNxFvjvt8UIA++xpkXMKfa8ich+AHzjcHKdV5e+oGKq6L/l9v4g8BqOry2z8rZq9LiInqeprInISgP2VbpCdVPX11M+F/t95utsnAGUiHgdwhYjMFZFTYby/X1W4TWVJ/mOlXAZjsLua/RrAIhE5VURqYAzQP17hNtlGROaJyFGpnwFcgOr/nZl5HEB78ud2AFZFKatSKf93rt355yIilwH4VwDHwSgT8VtVvRBGmYjbROQIgEkUXibCU6zen6q+ICLdAH4P4AiAa1R1spJttcFGETkTRtfIAICrKtuc8qjqERG5FsBTAMIANqvqCxVulp1OAPCYGGtyzAHwbVX9YWWbVB4R+Q6A8wEsSJafuQXABgDdInIlgCEAn6hcC8tj8f7OL/b/juUdiIgCyNPdPkRE5AwGfyKiAGLwJyIKIAZ/IqIAYvAnIgogBn8iogBi8CciCqD/D0mp5flDBkurAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# Generate 2-D Toy data\n",
"x1 = np.linspace(-2,2,500)\n",
"elps = np.random.normal(0,2,500)+np.random.choice([-6,6],500)\n",
"x2 = 2*x1 + 2 +elps\n",
"X = np.concatenate((x1.reshape(-1,1),x2.reshape(-1,1)), axis= 1)\n",
"# print (X.shape)\n",
"y = []\n",
"\n",
"for i in range(X.shape[0]):\n",
" if 2*X[i][0]-X[i][1]+2 >0:\n",
" y.append(0)\n",
" else:\n",
" y.append(1)\n",
"y = np.array(y)\n",
"\n",
"\n",
"\n",
"lda = LDA()\n",
"val,eig = lda.fit(X,y)\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"clf = LinearDiscriminantAnalysis(n_components=2,solver=\"eigen\")\n",
"clf.fit(X, y)\n",
"color = {0: \"red\",1:\"green\"}\n",
"X2= np.dot(X,clf.coef_.T)*clf.coef_\n",
"X1 = np.dot(X,eig)*eig.T\n",
"for y_i in np.unique(y):\n",
" idx = np.where(y == y_i)\n",
" plt.scatter([X[idx,0]],[X[idx,1]],c = color[y_i])\n",
" plt.scatter([X1[idx,0]],[X1[idx,1]])\n",
" plt.scatter([X2[idx,0]],[X2[idx,1]])\n",
"plt.xlim([-15,15])\n",
"plt.ylim([-15,15])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0, 0],\n",
" [ 2, 2],\n",
" [ 4, 4],\n",
" [ 6, 6],\n",
" [ 8, 8],\n",
" [ 9, 9],\n",
" [10, 10],\n",
" [12, 12],\n",
" [13, 13]])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"ename": "IndexError",
"evalue": "index 10 is out of bounds for axis 0 with size 9",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-18-337be8e4e336>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0my\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0midx\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msqueeze\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mIndexError\u001b[0m: index 10 is out of bounds for axis 0 with size 9"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "operands could not be broadcast together with shapes (3,2) (3,) ",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-12-176ba53b9a04>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mx\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mValueError\u001b[0m: operands could not be broadcast together with shapes (3,2) (3,) "
]
}
],
"source": [
"x = np.array([[1,2,4],[1,2,3]])\n",
"y = np.array([3,4,5])\n",
"x-y"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 9, 15],\n",
" [15, 25]])"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = np.array([3,5])\n",
"np.dot(y.reshape(-1,1),y.reshape(1,-1))\n",
"# y.reshape(1,-1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
# License: BSD 3 clause
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import rbf_kernel
from scipy.sparse.linalg import eigsh
print(__doc__)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
TransformerMixin):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def __init__(self, n_components, gamma, lmb=0.001):
self.n_components = n_components
self.gamma = gamma
self.lmb = lmb
self.X = None # 用于存放输入的训练数据的 X
self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
def fit(self, X, y):
self.X = X
if np.unique(y).shape[0] == 2:
x_1 = X[y == 0]
x_2 = X[y == 1]
M1 = np.expand_dims(np.sum(rbf_kernel(X, x_1,gamma=self.gamma), axis=-1) / x_1.shape[0], axis=-1)
M2 = np.expand_dims(np.sum(rbf_kernel(X, x_2,gamma=self.gamma), axis=-1) / x_2.shape[0], axis=-1)
M = ((M1 - M2).dot((M1 - M2).T))
self.M =M
N = np.zeros((X.shape[0], X.shape[0]))
K1 = rbf_kernel(X, x_1,gamma=self.gamma)
N1 = (K1.dot(np.diag(np.ones(x_1.shape[0])) - 1 / x_1.shape[0])).dot(K1.T)
K2 = rbf_kernel(X, x_2,gamma=self.gamma)
N2 = (K2.dot(np.diag(np.ones(x_2.shape[0])) - 1 / x_2.shape[0])).dot(K2.T)
N = N1 + N2
N += np.diag(np.ones(N.shape[0]) * self.lmb)
self.N = N
# N_inv = np.linalg.inv(N)
# tmp = N_inv.dot(M)
# e_vals, e_vecs = np.linalg.eig(tmp)
# e_vals_max_idx = np.flip(np.argsort(e_vals.real))
e_vals, e_vecs = eigsh(M, self.n_components, N, which='LM')
self.EigenVectors = e_vecs
else:
M_total = np.expand_dims(np.sum(rbf_kernel(X, X,gamma=self.gamma), axis=-1) / X.shape[0], axis=-1)
M = np.zeros((X.shape[0], X.shape[0]))
N = np.zeros((X.shape[0], X.shape[0]))
y_unique = np.unique(y)
for y_ in y_unique:
x = X[y == y_]
Mi = np.expand_dims(np.sum(rbf_kernel(X, x,gamma=self.gamma), axis=-1) / x.shape[0], axis=-1)
M += ((Mi - M_total).dot((Mi - M_total).T))
Ki = rbf_kernel(X, x,gamma=self.gamma)
N += (Ki.dot(np.diag(np.ones(x.shape[0])) - 1 / x.shape[0])).dot(Ki.T)
N += np.diag(np.ones(N.shape[0]) * self.lmb)
self.N = N
self.M = M
e_vals, e_vecs = eigsh(M, self.n_components, N, which='LM')
self.EigenVectors = e_vecs
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1.
Returns
-------
self
"""
def transform(self, X_test):
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
return rbf_kernel(X_test,self.X) @ self.EigenVectors
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors = 3
# 设置随机数种子让实验可以复现
random_state = 0
# 现在人脸数据集
faces = fetch_olivetti_faces()
targets = faces.target
# show sample images
images = faces.images[targets < 2] # save images
features = faces.data # features
targets = faces.target # targets
fig = plt.figure() # create a new figure window
for i in range(20): # display 20 images
# subplot : 4 rows and 5 columns
img_grid = fig.add_subplot(4, 5, i+1)
# plot features as image
img_grid.imshow(images[i], cmap='gray')
plt.show()
# Prepare data, 只限于处理类别 0 和 1 的人脸
X, y = faces.data[targets < 2], faces.target[targets < 2]
# Split into train/test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.33, stratify=y,
random_state=random_state)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
kda = make_pipeline(StandardScaler(),
KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005))
# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
plt.figure()
# plt.subplot(1, 3, i + 1, aspect=1)
# Fit the method's model
kda.fit(X_train, y_train)
# Fit a nearest neighbor classifier on the embedded training set
knn.fit(kda.transform(X_train), y_train)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(kda.transform(X_test), y_test)
# Embed the data set in 2 dimensions using the fitted model
X_embedded = kda.transform(X)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
n_neighbors,
acc_knn))
plt.show()
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
"Number","Sepal.Length","Sepal.Width","Petal.Length","Petal.Width","Species"
"1",5.1,3.5,1.4,0.2,"setosa"
"2",4.9,3,1.4,0.2,"setosa"
"3",4.7,3.2,1.3,0.2,"setosa"
"4",4.6,3.1,1.5,0.2,"setosa"
"5",5,3.6,1.4,0.2,"setosa"
"6",5.4,3.9,1.7,0.4,"setosa"
"7",4.6,3.4,1.4,0.3,"setosa"
"8",5,3.4,1.5,0.2,"setosa"
"9",4.4,2.9,1.4,0.2,"setosa"
"10",4.9,3.1,1.5,0.1,"setosa"
"11",5.4,3.7,1.5,0.2,"setosa"
"12",4.8,3.4,1.6,0.2,"setosa"
"13",4.8,3,1.4,0.1,"setosa"
"14",4.3,3,1.1,0.1,"setosa"
"15",5.8,4,1.2,0.2,"setosa"
"16",5.7,4.4,1.5,0.4,"setosa"
"17",5.4,3.9,1.3,0.4,"setosa"
"18",5.1,3.5,1.4,0.3,"setosa"
"19",5.7,3.8,1.7,0.3,"setosa"
"20",5.1,3.8,1.5,0.3,"setosa"
"21",5.4,3.4,1.7,0.2,"setosa"
"22",5.1,3.7,1.5,0.4,"setosa"
"23",4.6,3.6,1,0.2,"setosa"
"24",5.1,3.3,1.7,0.5,"setosa"
"25",4.8,3.4,1.9,0.2,"setosa"
"26",5,3,1.6,0.2,"setosa"
"27",5,3.4,1.6,0.4,"setosa"
"28",5.2,3.5,1.5,0.2,"setosa"
"29",5.2,3.4,1.4,0.2,"setosa"
"30",4.7,3.2,1.6,0.2,"setosa"
"31",4.8,3.1,1.6,0.2,"setosa"
"32",5.4,3.4,1.5,0.4,"setosa"
"33",5.2,4.1,1.5,0.1,"setosa"
"34",5.5,4.2,1.4,0.2,"setosa"
"35",4.9,3.1,1.5,0.2,"setosa"
"36",5,3.2,1.2,0.2,"setosa"
"37",5.5,3.5,1.3,0.2,"setosa"
"38",4.9,3.6,1.4,0.1,"setosa"
"39",4.4,3,1.3,0.2,"setosa"
"40",5.1,3.4,1.5,0.2,"setosa"
"41",5,3.5,1.3,0.3,"setosa"
"42",4.5,2.3,1.3,0.3,"setosa"
"43",4.4,3.2,1.3,0.2,"setosa"
"44",5,3.5,1.6,0.6,"setosa"
"45",5.1,3.8,1.9,0.4,"setosa"
"46",4.8,3,1.4,0.3,"setosa"
"47",5.1,3.8,1.6,0.2,"setosa"
"48",4.6,3.2,1.4,0.2,"setosa"
"49",5.3,3.7,1.5,0.2,"setosa"
"50",5,3.3,1.4,0.2,"setosa"
"51",7,3.2,4.7,1.4,"versicolor"
"52",6.4,3.2,4.5,1.5,"versicolor"
"53",6.9,3.1,4.9,1.5,"versicolor"
"54",5.5,2.3,4,1.3,"versicolor"
"55",6.5,2.8,4.6,1.5,"versicolor"
"56",5.7,2.8,4.5,1.3,"versicolor"
"57",6.3,3.3,4.7,1.6,"versicolor"
"58",4.9,2.4,3.3,1,"versicolor"
"59",6.6,2.9,4.6,1.3,"versicolor"
"60",5.2,2.7,3.9,1.4,"versicolor"
"61",5,2,3.5,1,"versicolor"
"62",5.9,3,4.2,1.5,"versicolor"
"63",6,2.2,4,1,"versicolor"
"64",6.1,2.9,4.7,1.4,"versicolor"
"65",5.6,2.9,3.6,1.3,"versicolor"
"66",6.7,3.1,4.4,1.4,"versicolor"
"67",5.6,3,4.5,1.5,"versicolor"
"68",5.8,2.7,4.1,1,"versicolor"
"69",6.2,2.2,4.5,1.5,"versicolor"
"70",5.6,2.5,3.9,1.1,"versicolor"
"71",5.9,3.2,4.8,1.8,"versicolor"
"72",6.1,2.8,4,1.3,"versicolor"
"73",6.3,2.5,4.9,1.5,"versicolor"
"74",6.1,2.8,4.7,1.2,"versicolor"
"75",6.4,2.9,4.3,1.3,"versicolor"
"76",6.6,3,4.4,1.4,"versicolor"
"77",6.8,2.8,4.8,1.4,"versicolor"
"78",6.7,3,5,1.7,"versicolor"
"79",6,2.9,4.5,1.5,"versicolor"
"80",5.7,2.6,3.5,1,"versicolor"
"81",5.5,2.4,3.8,1.1,"versicolor"
"82",5.5,2.4,3.7,1,"versicolor"
"83",5.8,2.7,3.9,1.2,"versicolor"
"84",6,2.7,5.1,1.6,"versicolor"
"85",5.4,3,4.5,1.5,"versicolor"
"86",6,3.4,4.5,1.6,"versicolor"
"87",6.7,3.1,4.7,1.5,"versicolor"
"88",6.3,2.3,4.4,1.3,"versicolor"
"89",5.6,3,4.1,1.3,"versicolor"
"90",5.5,2.5,4,1.3,"versicolor"
"91",5.5,2.6,4.4,1.2,"versicolor"
"92",6.1,3,4.6,1.4,"versicolor"
"93",5.8,2.6,4,1.2,"versicolor"
"94",5,2.3,3.3,1,"versicolor"
"95",5.6,2.7,4.2,1.3,"versicolor"
"96",5.7,3,4.2,1.2,"versicolor"
"97",5.7,2.9,4.2,1.3,"versicolor"
"98",6.2,2.9,4.3,1.3,"versicolor"
"99",5.1,2.5,3,1.1,"versicolor"
"100",5.7,2.8,4.1,1.3,"versicolor"
"101",6.3,3.3,6,2.5,"virginica"
"102",5.8,2.7,5.1,1.9,"virginica"
"103",7.1,3,5.9,2.1,"virginica"
"104",6.3,2.9,5.6,1.8,"virginica"
"105",6.5,3,5.8,2.2,"virginica"
"106",7.6,3,6.6,2.1,"virginica"
"107",4.9,2.5,4.5,1.7,"virginica"
"108",7.3,2.9,6.3,1.8,"virginica"
"109",6.7,2.5,5.8,1.8,"virginica"
"110",7.2,3.6,6.1,2.5,"virginica"
"111",6.5,3.2,5.1,2,"virginica"
"112",6.4,2.7,5.3,1.9,"virginica"
"113",6.8,3,5.5,2.1,"virginica"
"114",5.7,2.5,5,2,"virginica"
"115",5.8,2.8,5.1,2.4,"virginica"
"116",6.4,3.2,5.3,2.3,"virginica"
"117",6.5,3,5.5,1.8,"virginica"
"118",7.7,3.8,6.7,2.2,"virginica"
"119",7.7,2.6,6.9,2.3,"virginica"
"120",6,2.2,5,1.5,"virginica"
"121",6.9,3.2,5.7,2.3,"virginica"
"122",5.6,2.8,4.9,2,"virginica"
"123",7.7,2.8,6.7,2,"virginica"
"124",6.3,2.7,4.9,1.8,"virginica"
"125",6.7,3.3,5.7,2.1,"virginica"
"126",7.2,3.2,6,1.8,"virginica"
"127",6.2,2.8,4.8,1.8,"virginica"
"128",6.1,3,4.9,1.8,"virginica"
"129",6.4,2.8,5.6,2.1,"virginica"
"130",7.2,3,5.8,1.6,"virginica"
"131",7.4,2.8,6.1,1.9,"virginica"
"132",7.9,3.8,6.4,2,"virginica"
"133",6.4,2.8,5.6,2.2,"virginica"
"134",6.3,2.8,5.1,1.5,"virginica"
"135",6.1,2.6,5.6,1.4,"virginica"
"136",7.7,3,6.1,2.3,"virginica"
"137",6.3,3.4,5.6,2.4,"virginica"
"138",6.4,3.1,5.5,1.8,"virginica"
"139",6,3,4.8,1.8,"virginica"
"140",6.9,3.1,5.4,2.1,"virginica"
"141",6.7,3.1,5.6,2.4,"virginica"
"142",6.9,3.1,5.1,2.3,"virginica"
"143",5.8,2.7,5.1,1.9,"virginica"
"144",6.8,3.2,5.9,2.3,"virginica"
"145",6.7,3.3,5.7,2.5,"virginica"
"146",6.7,3,5.2,2.3,"virginica"
"147",6.3,2.5,5,1.9,"virginica"
"148",6.5,3,5.2,2,"virginica"
"149",6.2,3.4,5.4,2.3,"virginica"
"150",5.9,3,5.1,1.8,"virginica"
This source diff could not be displayed because it is too large. You can view the blob instead.
1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
这个是咱们第一个项目,没有思路的可以找小徐班主任要Jerry老师的视频描述(建议大家先自己多理解看看,如果暂时没有思路可以看看之前的课程,也可以再找找资料)<br/><br/>
要求如下:<br/><br/>
作业截至时间:5月22日23:59,具体答案将会在本周末讲解并公布<br/><br/>
----基于 Kernel LDA + KNN 的人脸识别<br/>
----使用 Kernel Discriminant Analysis 做特征降维<br/>
----使用 K-Nearest-Neighbor 做分类<br/><br/><br/>
数据:<br/>
----人脸图像来自于 Olivetti faces data-set from AT&T (classification)<br/>
----数据集包含 40 个人的人脸图像, 每个人都有 10 张图像<br/>
----我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像<br/><br/><br/>
算法:<br/>
----需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维<br/>
----代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容<br/><br/><br/>
结果:<br/>
1.要求识别成功率:100%<br/>
2.达到如图所示效果图
无论Homework还是Project,都是需要通过gitlab提交的。
无论Homework还是Project,都是需要通过gitlab提交的。
视频版教程:
链接:https://pan.baidu.com/s/1HxNRvrPwOBT4xCzU1iSfwg 
提取码:toov
文字版教程:
一、git基本操作
1、克隆仓库
git clone url(git仓库的地址)
2、添加还未被追踪的文件文件
git add 文件名称或者是. (.代表了所有的文件)
3、添加本次提交的注释信息
git commit -m "这里是注释信息"
4、推到远程
git push
5、在已有仓库中拉取远程的代码
git pull
二、添加ssh-key
1、生成ssh-key
    命令:ssh-keygen -t rsa -C "你的gitlab的邮箱"
2、到个人的家目录下复制公钥内容
    - cd ~/.ssh
    - cat id_rsa.pub
    复制内容到gitlab的ssh keys中(点击右上角头像--》settings--》ssh-keys--》粘贴内容到keys中,title名字任意)
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 机器学习中的优化 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题1 (30分)\n",
"假设我们有训练数据$D=\\{(\\mathbf{x}_1,y_1),...,(\\mathbf{x}_n,y_n)\\}$, 其中$(\\mathbf{x}_i,y_i)$为每一个样本,而且$\\mathbf{x}_i$是样本的特征并且$\\mathbf{x}_i\\in \\mathcal{R}^D$, $y_i$代表样本数据的标签(label), 取值为$0$或者$1$. 在逻辑回归中,模型的参数为$(\\mathbf{w},b)$。对于向量,我们一般用粗体来表达。 为了后续推导的方便,可以把b融入到参数w中。 这是参数$w$就变成 $w=(w_0, w_1, .., w_D)$,也就是前面多出了一个项$w_0$, 可以看作是b,这时候每一个$x_i$也需要稍作改变可以写成 $x_i = [1, x_i]$,前面加了一个1。稍做思考应该能看出为什么可以这么写。\n",
"\n",
"请回答以下问题。请用Markdown自带的Latex来编写。\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (a) ```编写逻辑回归的目标函数```\n",
"请写出目标函数(objective function), 也就是我们需要\"最小化\"的目标(也称之为损失函数或者loss function),不需要考虑正则。 把目标函数表示成最小化的形态,另外把$\\prod_{}^{}$转换成$\\log \\sum_{}^{}$\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$L(w)=$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (b) ```求解对w的一阶导数```\n",
"为了做梯度下降法,我们需要对参数$w$求导,请把$L(w)$对$w$的梯度计算一下:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$\\frac{\\partial L(w)}{\\partial w}=$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (c) ```求解对w的二阶导数```\n",
"在上面结果的基础上对$w$求解二阶导数,也就是再求一次导数。 这个过程需要回忆一下线性代数的部分 ^^。 参考: matrix cookbook: https://www.math.uwaterloo.ca/~hwolkowi/matrixcookbook.pdf, 还有 Hessian Matrix。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$\\frac{\\partial^2 L(w)}{\\partial^2 w}=$"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (d) ```证明逻辑回归目标函数是凸函数```\n",
"试着证明逻辑回归函数是凸函数。假设一个函数是凸函数,我们则可以得出局部最优解即为全局最优解,所以假设我们通过随机梯度下降法等手段找到最优解时我们就可以确认这个解就是全局最优解。证明凸函数的方法有很多种,在这里我们介绍一种方法,就是基于二次求导大于等于0。比如给定一个函数$f(x)=x^2-3x+3$,做两次\n",
"求导之后即可以得出$f''(x)=2 > 0$,所以这个函数就是凸函数。类似的,这种理论也应用于多元变量中的函数上。在多元函数上,只要证明二阶导数是posititive semidefinite即可以。 问题(c)的结果是一个矩阵。 为了证明这个矩阵(假设为H)为Positive Semidefinite,需要证明对于任意一个非零向量$v\\in \\mathcal{R}$, 需要得出$v^{T}Hv >=0$\n",
"请写出详细的推导过程:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// TODO 请写下推导过程\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题2 (20分)\n",
"证明p-norm是凸函数, p-norm的定义为:\n",
"$||x||_p=(\\sum_{i=1}^{n}|x_i|^p)^{1/p}$\n",
"\n",
"hint: Minkowski’s Inequality"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// TODO: your proof\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题3 (20分)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"在第一次课程中,我们讲了在判定凸函数的时候用到一项技术:second-order convexity, 也就是当函数f(x)在每一个点上twice differentiable, 这时候我们就有个性质: f(x)是凸函数,当且仅当 f(x)的二阶为PSD矩阵(半正定矩阵)。 请在下方试着证明一下此理论。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// your proof of second order convexity\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题4 (30分)\n",
"在课堂里我们讲过Transportation problem. 重新描述问题: 有两个城市北京和上海,分别拥有300件衣服和500件衣服,另外有三个城市分别是1,2,3分别需要200,300,250件衣服。现在需要把衣服从北京和上海运送到城市1,2,3。 我们假定每运输一件衣服会产生一些代价,比如:\n",
"- 北京 -> 1: 5\n",
"- 北京 -> 2: 6\n",
"- 北京 -> 3: 4\n",
"- 上海 -> 1: 6\n",
"- 上海 -> 2: 3\n",
"- 上海 -> 3: 7\n",
"\n",
"最后的值是单位cost. \n",
"\n",
"问题:我们希望最小化成本。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```(a)``` 请写出linear programming formulation。 利用标准的写法(Stanford form),建议使用矩阵、向量的表示法。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// your formulation\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```(b)``` 利用lp solver求解最优解。 参考:\n",
"https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linprog.html\n",
" 或者: http://cvxopt.org/"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# your implementation\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```(c)```: 试着把上述LP转化成Dual formulation,请写出dual form. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// your formulation"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 机器学习中的优化 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题1 (30分)\n",
"假设我们有训练数据$D=\\{(\\mathbf{x}_1,y_1),...,(\\mathbf{x}_n,y_n)\\}$, 其中$(\\mathbf{x}_i,y_i)$为每一个样本,而且$\\mathbf{x}_i$是样本的特征并且$\\mathbf{x}_i\\in \\mathcal{R}^D$, $y_i$代表样本数据的标签(label), 取值为$0$或者$1$. 在逻辑回归中,模型的参数为$(\\mathbf{w},b)$。对于向量,我们一般用粗体来表达。 为了后续推导的方便,可以把b融入到参数w中。 这是参数$w$就变成 $w=(w_0, w_1, .., w_D)$,也就是前面多出了一个项$w_0$, 可以看作是b,这时候每一个$x_i$也需要稍作改变可以写成 $x_i = [1, x_i]$,前面加了一个1。稍做思考应该能看出为什么可以这么写。\n",
"\n",
"请回答以下问题。请用Markdown自带的Latex来编写。\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (a) ```编写逻辑回归的目标函数```\n",
"请写出目标函数(objective function), 也就是我们需要\"最小化\"的目标(也称之为损失函数或者loss function),不需要考虑正则。 把目标函数表示成最小化的形态,另外把$\\prod_{}^{}$转换成$\\log \\sum_{}^{}$\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We know that \n",
"\\begin{align*}\n",
"P(Y|x) = \\frac{1}{1+e^{-w^{T}x}}\\\\\n",
"\\end{align*}\n",
"Want to find the Maximum likelihood of\n",
"\\begin{align*}\n",
"P(X|\\theta) = \\prod^{n}_{i=1} (1 - P(y_{i}=1|x_{i}))^{1-y_{i}}*P(y_{i}=1|x_{i})^{y_{i}}\n",
"\\end{align*}\n",
"\n",
"\n",
"\n",
"Taking the log,\n",
"\\begin{align}\n",
"L(w)&=\\log \\prod^{n}_{i=1} (1 - P(y_{i}=1|x_{i}))^{1-y_{i}}*P(y_{i}=1|x_{i})^{y_{i}}\\\\\n",
"&=\\sum^{n}_{i=1} \\log (1 - P(y_{i}=1|x_{i}))^{1-y_{i}}*P(y_{i}=1|x_{i})^{y_{i}}\\\\\n",
"&=\\sum^{n}_{i=1} (1-y_{i})\\log (1 - P(y_{i}=1|x_{i}))+y_{i}\\log P(y_{i}=1|x_{i})\\\\\n",
"&=\\sum^{n}_{i=1} \\log (1 - P(y_{i}=1|x_{i})) + y_{i}\\log (\\frac{P(y_{i}=1|x_{i})}{1-P(y_{i}=1|x_{i})})\\\\\n",
"&=\\sum^{n}_{i=1} -(1-y_{i})(wx_{i}) - \\log (1+e^{wx_{i}})\n",
"\\end{align}\n",
"\n",
"<!-- Hence, want to find\n",
"\\begin{align*}\n",
"\\max_{\\theta} \\sum^{n}_{i=1} \\log (1 - P(y_{i}=1|x_{i}))^{1-y_{i}}*P(y_{i}=1|x_{i})^{y_{i}}\n",
"\\end{align*}\n",
"\n",
"Subsitute $P(y_{i}=1|x_{i})$ with $\\frac{1}{1+e^{-w^{T}x}}$,\n",
"\\begin{align*}\n",
"\\max_{\\theta} \\sum^{n}_{i=1} \\log (1 - \\frac{1}{1+e^{-w^{T}x}})^{1-y_{i}}*(\\frac{1}{1+e^{-w^{T}x}})^{y_{i}}\n",
"\\end{align*}\n",
"\n",
"The likelihood function is maximized when its derivative equals zero\n",
"\\begin{align}\n",
"\\frac{d}{d\\theta} \\sum^{n}_{i=1} \\log (1 - \\frac{1}{1+e^{-w^{T}x}})^{1-y_{i}}*(\\frac{1}{1+e^{-w^{T}x}})^{y_{i}}\\\\\n",
"= \n",
"\\end{align} -->\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (b) ```求解对w的一阶导数```\n",
"为了做梯度下降法,我们需要对参数$w$求导,请把$L(w)$对$w$的梯度计算一下:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"From last question, we know\n",
"\\begin{align}\n",
"L(w)&=\\sum^{n}_{i=1} (1-y_{i})(wx_{i}) - \\log (1+e^{wx_{i}})\n",
"\\end{align}\n",
"\n",
"Taking the derivative of L(w),\n",
"\\begin{align}\n",
"\\frac{\\partial L(w)}{\\partial w} &= \\sum^{n}_{i=1} -x_{i}(1-y_{i})+x_{i}e^{-wx_{i}} \\frac{1}{1+e^{-wx_{i}}}\\\\\n",
"&= \\sum^{n}_{i=1} x_{i}(\\frac{e^{-wx_{i}}}{1+e^{-wx_{i}}}-1+y_{i})\\\\\n",
"&= \\sum^{n}_{i=1} x_{i}(\\frac{e^{-wx_{i}}}{1+e^{-wx_{i}}}-\\frac{1+e^{-wx_{i}}}{1+e^{-wx_{i}}}+y_{i})\\\\\n",
"&= \\sum^{n}_{i=1} x_{i}(\\frac{-1}{1+e^{-wx_{i}}}+y_{i}) = \\sum^{n}_{i=1} x_{i}(y_{i} - \\frac{1}{1+e^{-wx_{i}}})\\\\\n",
"&= \\sum^{n}_{i=1} (y_{i} - \\frac{1}{1+e^{-wx_{i}}})x_{i}\\\\\n",
"&= X(Y - \\frac{1}{1+e^{-wX}})\n",
"\\end{align}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (c) ```求解对w的二阶导数```\n",
"在上面结果的基础上对$w$求解二阶导数,也就是再求一次导数。 这个过程需要回忆一下线性代数的部分 ^^。 参考: matrix cookbook: https://www.math.uwaterloo.ca/~hwolkowi/matrixcookbook.pdf, 还有 Hessian Matrix。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"Want to calculate, $\\frac{\\partial}{\\partial w_{j}\\partial w_{k}} L(w)$\n",
"\\begin{align}\n",
"\\frac{\\partial}{\\partial w_{j}\\partial w_{k}} L(w) &= \\frac{\\partial}{\\partial w_{k}} \\sum^{n}_{i=1} (y_{i} - \\frac{1}{1+e^{-wx_{i}}})x_{ij}\\\\\n",
"&= \\sum^{n}_{i=1} \\frac{\\partial}{\\partial w_{k}}(\\frac{1}{1+e^{-wx_{i}}})x_{ij}\n",
"\\end{align}\n",
"\n",
"Let calculate $\\frac{\\partial}{\\partial w_{k}}(\\frac{1}{1+e^{-wx_{i}}})$.We know that,\n",
"\\begin{align}\n",
"\\frac{\\partial}{\\partial w_{k}} \\log \\frac{1}{1+e^{-wx_{i}}} &= (1+e^{-wx_{i}})*\\frac{\\partial}{\\partial w_{k}}\\frac{1}{1+e^{-wx_{i}}}\n",
"\\end{align}\n",
"\n",
"and,\n",
"\\begin{align}\n",
"\\frac{\\partial}{\\partial w_{k}} \\log \\frac{1}{1+e^{-wx_{i}}} &=\\frac{e^{-wx_{i}}x_{ik}}{1+e^{-wx_{i}}}\\\\\n",
"&= \\frac{x_{ik}+e^{-wx_{i}}x_{ik}-x_{ik}}{1+e^{-wx_{i}}}\\\\\n",
"&= \\frac{x_{ik}+e^{-wx_{i}}x_{ik}-x_{ik}}{1+e^{-wx_{i}}}\\\\\n",
"&= \\frac{x_{ik}(1+e^{-wx_{i}})-x_{ik}}{1+e^{-wx_{i}}}\\\\\n",
"&= x_{ik}-x_{ik}\\frac{1}{1+e^{-wx_{i}}} = x_{ik}(1-\\frac{1}{1+e^{-wx_{i}}})\n",
"\\end{align}\n",
"\n",
"So,\n",
"\\begin{align}\n",
"\\frac{\\partial}{\\partial w_{k}}(\\frac{1}{1+e^{-wx_{i}}})&= \\frac {x_{ik}(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}}\n",
"\\end{align}\n",
"\n",
"Hence,\n",
"\\begin{align}\n",
"\\sum^{n}_{i=1} \\frac{\\partial}{\\partial w_{k}}(\\frac{1}{1+e^{-wx_{i}}})x_{ij} &= \\sum^{n}_{i=1} \\frac {x_{ij}x_{ik}(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}}\n",
"\\end{align}\n",
"\n",
"Let,\n",
"\\begin{align}\n",
"z_{j} &= {x_{1j},x_{2j},x_{3j},...,x_{nj}}\\\\\n",
"z_{k} &= {x_{1k},x_{2k},x_{3k},...,x_{nk}}\\\\\n",
"\\Sigma &= Diag(\\frac {(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}})\n",
"\\end{align}\n",
"Thus,\n",
"\\begin{align}\n",
"\\frac{\\partial}{\\partial w_{j}\\partial w_{k}} L(w) &= z_{j}^{T}\\Sigma z_{k}\n",
"\\end{align}\n",
"\n",
"Futhermore,\n",
"\\begin{align}\n",
"\\frac{\\partial}{\\partial w\\partial w} L(w) &= X\\Sigma X^{T}\n",
"\\end{align}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### (d) ```证明逻辑回归目标函数是凸函数```\n",
"试着证明逻辑回归函数是凸函数。假设一个函数是凸函数,我们则可以得出局部最优解即为全局最优解,所以假设我们通过随机梯度下降法等手段找到最优解时我们就可以确认这个解就是全局最优解。证明凸函数的方法有很多种,在这里我们介绍一种方法,就是基于二次求导大于等于0。比如给定一个函数$f(x)=x^2-3x+3$,做两次\n",
"求导之后即可以得出$f''(x)=2 > 0$,所以这个函数就是凸函数。类似的,这种理论也应用于多元变量中的函数上。在多元函数上,只要证明二阶导数是posititive semidefinite即可以。 问题(c)的结果是一个矩阵。 为了证明这个矩阵(假设为H)为Positive Semidefinite,需要证明对于任意一个非零向量$v\\in \\mathcal{R}$, 需要得出$v^{T}Hv >=0$\n",
"请写出详细的推导过程:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"Want to show that $\\forall v \\in \\mathbb{R},v^{T}\\frac{\\partial}{\\partial w\\partial w} L(w) v \\geq 0$\\\n",
"We know,\n",
"\\begin{align}\n",
"\\frac{\\partial}{\\partial w\\partial w} L(w) = X\\Sigma X^{T}\n",
"\\end{align}\n",
"\n",
"So,\n",
"\\begin{align}\n",
"v^{T}\\frac{\\partial}{\\partial w\\partial w} L(w) v &= v^{T}X\\Sigma X^{T} v\\\\\n",
"&=v^{T}\\sum^{n}_{i=1} \\frac {x_{i}x_{i}^{T}(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}} v\\\\\n",
"&=\\sum^{n}_{i=1} \\frac {(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}}v^{T} x_{i}x_{i}^{T}v\\\\\n",
"&=\\sum^{n}_{i=1} \\frac {(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}}(v^{T} x_{i})^{T}x_{i}^{T}v\\\\\n",
"&=\\sum^{n}_{i=1} \\frac {(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}}x_{i}^{T}vx_{i}^{T}v\\\\\n",
"& \\text{Since } x_{i}^{T}v \\text{ is a scalar,}\\\\\n",
"&=\\sum^{n}_{i=1} \\frac {(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}}(x_{i}^{T}v)^{2}\n",
"\\end{align}\n",
"\n",
"Since, $\\frac {(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}} >=0$ and $(x_{i}^{T}v)^{2} >=0$,\n",
"\\begin{align}\n",
"v^{T}\\frac{\\partial}{\\partial w\\partial w} L(w) v&=\\sum^{n}_{i=1} \\frac {(1-\\frac{1}{1+e^{-wx_{i}}})}{1+e^{-wx_{i}}}(x_{i}^{T}v)^{2}\\\\\n",
"&\\geq 0\\\\\n",
"&\\text{Therefore, L(w) is a Convex function}\n",
"\\end{align}\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题2 (20分)\n",
"证明p-norm是凸函数, p-norm的定义为:\n",
"$||x||_p=(\\sum_{i=1}^{n}|x_i|^p)^{1/p}$\n",
"\n",
"hint: Minkowski’s Inequality"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let V be a vector space.\n",
"\n",
"For $v \\in V$,We define $\\|v\\|$ as the norm of v\n",
"\n",
"By definition, a function is norm if it satisfy the following conditions:\n",
"\n",
"1. $\\forall v \\in V,\\|v\\|\\geq 0$ and $\\|v\\|=0 \\Leftrightarrow v=0$\n",
"2. $\\forall v \\in V, \\lambda \\in \\mathbb{R}:\\| \\lambda \\|\\| v \\|=\\| \\lambda v \\|$\n",
"3. $\\forall v,w \\in V:\\| v+w \\| \\leq \\| v \\| + \\| w \\|$ (Triangle inequality)\n",
"\n",
"By the second Property, we know:\n",
"\n",
"$\\| \\lambda v \\| + \\| (1-\\lambda)w \\| = \\lambda \\| v \\| + (1-\\lambda) \\| w \\|$\n",
"\n",
"Hence, $\\| \\lambda v + (1-\\lambda)w \\| \\leq \\lambda \\| v \\| + (1-\\lambda) \\| w \\|$\n",
"\n",
"By the definition of convexity, this shows that every norm is convex.\n",
"\n",
"To show that P-norm is also convex, we need to prove that it is norm, that is showing p-norm satisfy the conditions of norm.\n",
"\n",
"First, by the definition of p-norm, $||x||_p=(\\sum_{i=1}^{n}|x_i|^p)^{1/p}$, it clearly satisfies the first condition\n",
"\n",
"Second, \n",
"\\begin{align}\n",
"\\lambda \\|x\\|_{p} &= \\lambda (\\sum_{i=1}^{n}|x_i|^p)^{1/p}\\\\\n",
"&= (\\lambda^{p})^{\\frac {1}{p}} (\\sum_{i=1}^{n}|x_i|^p)^{1/p}\\\\\n",
"&=(\\sum_{i=1}^{n}\\lambda^{p}|x_i|^p)^{1/p}\\\\\n",
"&= (\\sum_{i=1}^{n}|\\lambda x_i|^p)^{1/p}\\\\\n",
"&= \\|\\lambda x\\|_{p}\n",
"\\end{align}\n",
"\n",
"Third, to show the triangle inequality, we need to use Minkowski's Inequalities which states that,\n",
"\\begin{align}\n",
"(\\sum_{i=1}^{n}|a_i + b_i|^p)^{1/p} \\leq (\\sum_{i=1}^{n}|a_i|^p)^{1/p} + (\\sum_{i=1}^{n}|b_i|^p)^{1/p},\\forall p>1 \\text{ and } a_i,b_i>0\n",
"\\end{align}\n",
"\n",
"This shows that, $\\| a+b \\|_p \\leq \\|a\\| + \\|b\\|$ \n",
"Hence, triangle inequality satisfied\n",
"\n",
"Therefore, p-norm is norm, which implis that it is also convex\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题3 (20分)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"在第一次课程中,我们讲了在判定凸函数的时候用到一项技术:second-order convexity, 也就是当函数f(x)在每一个点上twice differentiable, 这时候我们就有个性质: f(x)是凸函数,当且仅当 f(x)的二阶为PSD矩阵(半正定矩阵)。 请在下方试着证明一下此理论。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// your proof of second order convexity\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 问题4 (30分)\n",
"在课堂里我们讲过Transportation problem. 重新描述问题: 有两个城市北京和上海,分别拥有300件衣服和500件衣服,另外有三个城市分别是1,2,3分别需要200,300,250件衣服。现在需要把衣服从北京和上海运送到城市1,2,3。 我们假定每运输一件衣服会产生一些代价,比如:\n",
"- 北京 -> 1: 5\n",
"- 北京 -> 2: 6\n",
"- 北京 -> 3: 4\n",
"- 上海 -> 1: 6\n",
"- 上海 -> 2: 3\n",
"- 上海 -> 3: 7\n",
"\n",
"最后的值是单位cost. \n",
"\n",
"问题:我们希望最小化成本。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```(a)``` 请写出linear programming formulation。 利用标准的写法(Stanford form),建议使用矩阵、向量的表示法。 "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// your formulation\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```(b)``` 利用lp solver求解最优解。 参考:\n",
"https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linprog.html\n",
" 或者: http://cvxopt.org/"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" fun: 3050.0\n",
" message: 'Optimization terminated successfully.'\n",
" nit: 6\n",
" slack: array([ 0., 50.])\n",
" status: 0\n",
" success: True\n",
" x: array([ 50., 150., 0., 300., 250., 0.])\n",
" fun: 3050.0\n",
" message: 'Optimization terminated successfully.'\n",
" nit: 6\n",
" slack: array([ 0., 50.])\n",
" status: 0\n",
" success: True\n",
" x: array([ 50., 150., 0., 300., 250., 0.])\n"
]
}
],
"source": [
"# your implementation\n",
"# scipy.optimize.linprog\n",
"from scipy.optimize import linprog as lp\n",
"import numpy as np\n",
"\n",
"c = np.array([5,6,6,3,4,7],dtype = np.float)\n",
"A_ieq = np.array([[1,0,1,0,1,0],[0,1,0,1,0,1]],dtype = np.float)\n",
"b_ieq = np.array([300,500],dtype=np.float)\n",
"A_eq = np.array([[1,1,0,0,0,0],[0,0,1,1,0,0],[0,0,0,0,1,1]],dtype = np.float)\n",
"b_eq = np.array([200,300,250],dtype = np.float)\n",
"res = lp(c = c,A_ub = A_ieq,b_ub = b_ieq, A_eq = A_eq, b_eq = b_eq)\n",
"print (res)\n",
"print (lp(c = c,A_ub = A_ieq,b_ub = b_ieq, A_eq = A_eq, b_eq = b_eq,method=\"simplex\"))\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```(c)```: 试着把上述LP转化成Dual formulation,请写出dual form. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"// your formulation"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
File added
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment