homework7

7066ba78 · 20200318029 · 4af6d4e1 · 7066ba78 · 7066ba78 · 7066ba78
Commit 7066ba78 authored Aug 04, 2020 by 20200318029
Hide whitespace changes
Inline Side-by-side

Showing with 747 additions and 13 deletions

homework7/A Contextual-Bandit Approach to Personalized News Article Recommendation.xml
+0 -0

homework7/LinUCB-Homework.ipynb
+426 -13

homework7/LinUCB-Homework.py
+321 -0

No files found.
--- a/homework7/A Contextual-Bandit Approach to Personalized News Article Recommendation.xml
+++ b/homework7/A Contextual-Bandit Approach to Personalized News Article Recommendation.xml
--- a/homework7/LinUCB-Homework.ipynb
+++ b/homework7/LinUCB-Homework.ipynb
@@ -60,11 +60,11 @@
    "        ucb - upper confidence bound\n",
    "    \"\"\"\n",
    "    def getUCB(self, x):\n",
-    "        Ainv = \n",
-    "        x = \n",
-    "        self.thetaHat = \n",
-    "        self.stdev = \n",
-    "        self.ucb = \n",
+    "        Ainv = np.linalg.inv(self.A)\n",
+    "        # x = \n",
+    "        self.thetaHat = np.matmul(Ainv, self.b)\n",
+    "        self.stdev = np.sqrt(np.matmul(np.matmul(x.T, Ainv), x))\n",
+    "        self.ucb = np.matmul(self.thetaHat.T, x) + self.alpha * self.stdev\n",
    "        return self.ucb[0][0]\n",
    "    \n",
    "    \"\"\"\n",
@@ -76,9 +76,9 @@
    "        x - context vector (1 x d)\n",
    "    \"\"\"\n",
    "    def update_arm(self, reward, x):\n",
-    "        x = \n",
-    "        self.A += \n",
-    "        self.b += \n",
+    "        # x = \n",
+    "        self.A += np.matmul(x, x.T)\n",
+    "        self.b += r * x\n",
    "        return None\n",
    "    \n",
    "    \"\"\"\n",
@@ -90,11 +90,11 @@
    "    \"\"\"\n",
    "    def update_alpha(self, method=1, t=None):\n",
    "        if method == 1:\n",
-    "            self.alpha = \n",
+    "            self.alpha = 0.1 / np.sqrt(t + 1)\n",
    "        elif method == 2:\n",
-    "            self.alpha = \n",
+    "            self.alpha = 0.01 / np.pow(t + 1, 0.25)\n",
    "        elif method == 3:\n",
-    "            self.alpha = \n",
+    "            self.alpha = 10 / (np.sqrt(t + 1) + 2 * t)\n",
    "        return None\n",
    "        "
   ]
@@ -439,10 +439,423 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
-   "source": []
+   "source": [
+    "from utils import getData, getContext, getAction\n",
+    "file = \"classification.txt\"\n",
+    "data = getData(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>92</th>\n",
+       "      <th>93</th>\n",
+       "      <th>94</th>\n",
+       "      <th>95</th>\n",
+       "      <th>96</th>\n",
+       "      <th>97</th>\n",
+       "      <th>98</th>\n",
+       "      <th>99</th>\n",
+       "      <th>100</th>\n",
+       "      <th>101</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>37</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>17</td>\n",
+       "      <td>30</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>36</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>6</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>23</td>\n",
+       "      <td>0</td>\n",
+       "      <td>11</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>9</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>77</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>43</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>13</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9995</th>\n",
+       "      <td>7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>29</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9996</th>\n",
+       "      <td>7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>29</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9997</th>\n",
+       "      <td>9</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>79</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>17</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9998</th>\n",
+       "      <td>2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>29</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>44</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>17</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9999</th>\n",
+       "      <td>7</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>35</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10000 rows × 102 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      0    1    2    3    4    5    6    7    8    9    ...  92   93   94   \\\n",
+       "0       5  NaN    5    0    0   37    6    0    0    0  ...    0    0    1   \n",
+       "1       7  NaN    1    3   36    0    0    0    0    0  ...    0    0    3   \n",
+       "2       6  NaN    3    0    0    0    0    0    0    0  ...    0    0    0   \n",
+       "3       9  NaN   77    0    0   13    0    0    0    0  ...    0    5    0   \n",
+       "4       2  NaN   43    0    2   44    1    0    0    0  ...    0    0    0   \n",
+       "...   ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   \n",
+       "9995    7  NaN    0    3   29    0    0    0    0    0  ...    0    0    4   \n",
+       "9996    7  NaN    1    0   29    0    0    0    0    0  ...    0    0    7   \n",
+       "9997    9  NaN   79    0    0   17    0    0    0    0  ...    0    4    0   \n",
+       "9998    2  NaN   29    0    2   44    0    0    0    0  ...    2    0    0   \n",
+       "9999    7  NaN    0    2   35    0    0    0    0    0  ...    0    0    6   \n",
+       "\n",
+       "      95   96   97   98   99   100  101  \n",
+       "0       3    0    0   17   30    4    0  \n",
+       "1       0    1    5    1    0    0   10  \n",
+       "2       0   23    0   11    3    0    0  \n",
+       "3       0    0    2    4    0    0    0  \n",
+       "4       0    0    0    0   13    0    0  \n",
+       "...   ...  ...  ...  ...  ...  ...  ...  \n",
+       "9995    0    5    8    1    0    0    9  \n",
+       "9996    0    2    2    0    0    0   13  \n",
+       "9997    1    0    3    9    0    0    0  \n",
+       "9998    0    0    0    0   17    0    0  \n",
+       "9999    0    0    6    0    0    0   12  \n",
+       "\n",
+       "[10000 rows x 102 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2       1.0\n",
+       "3       3.0\n",
+       "4      36.0\n",
+       "5       0.0\n",
+       "6       0.0\n",
+       "       ... \n",
+       "97      5.0\n",
+       "98      1.0\n",
+       "99      0.0\n",
+       "100     0.0\n",
+       "101    10.0\n",
+       "Name: 1, Length: 100, dtype: float64"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "getContext(data, 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "getAction(data, 1)"
+   ]
  }
 ],
 "metadata": {

--- a/homework7/LinUCB-Homework.py
+++ b/homework7/LinUCB-Homework.py
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[16]:
+
+
+import numpy as np
+import math
+import random
+
+
+# In[17]:
+
+
+"""
+Class UCBArm creates the LinUCB K-arms of our contextual-bandit.
+Via the LinUCB1 algorithm, our multi-arm bandit
+learns to optimize the cumulative take-rate (CTR) and minimize
+regret log-linearly. Through this optimization our bandit learns
+in an on-line manner by sequentially updating bandit arms
+based on an observed reward given a new context vector at each
+time step.
+"""
+class UCBArm(object):
+    """
+    Initialization:
+        All bandits at time step 0 are initialized to
+        have a DxD design matrix 'A' that is the identity
+        matrix and 'b' vector init to 0s.
+    @param:
+        id - unique arm id (1...K) for each arm
+        d - length of context vector
+        alpha  - exploitation rate
+    """
+    def __init__(self, id, d, alpha):
+        self.id = id
+        self.d = d
+        self.alpha = alpha
+        # Li lines 5-6
+        self.A = np.identity(self.d)
+        self.b = np.zeros((self.d,1))
+
+    """
+    getUCB:
+        Calculates the ucb given a context vector.
+        Assumes expected payoff is linear in its
+        d-dimensional feature vector. When considering
+        all the arms of the bandit this is performing
+        ridge regression to predict which arm should
+        be played given a context vector and using
+        on all the arms UCBs.
+    @param:
+        x - context vector (1 x d)
+    @return:
+        ucb - upper confidence bound
+    """
+    def getUCB(self, x):
+        Ainv = np.linalg.inv(self.A)
+        x = x.values[:, None]
+        self.thetaHat = np.matmul(Ainv, self.b)
+        self.stdev = np.sqrt(np.matmul(np.matmul(x.T, Ainv), x))
+        self.ucb = np.matmul(self.thetaHat.T, x) + self.alpha * self.stdev
+        return self.ucb[0][0]
+
+    """
+    update_arm:
+        Updates an arm's 'A' matrix and 'b' vector
+        based on observed reward and context vector
+    @param:
+        reward - reward for predicted action
+        x - context vector (1 x d)
+    """
+    def update_arm(self, reward, x):
+        x = x.values[:, None]
+        self.A += np.matmul(x, x.T)
+        self.b += reward * x
+        return None
+
+    """
+    update_alpha:
+        Used to update alpha during the training process
+    @param:
+        method - alpha update rule
+        t - None (default); use the time step to update alpha (ex alpha/t)
+    """
+    def update_alpha(self, method=1, t=None):
+        if method == 1:
+            self.alpha = 0.1 / np.sqrt(t + 1)
+        elif method == 2:
+            self.alpha = 0.1
+        elif method == 3:
+            self.alpha = 10 / (np.sqrt(t + 1) + 2 * t)
+        return None
+
+
+
+# In[18]:
+
+
+"""
+Class LinUCB implements Li's LinUCB Algorithm [1]
+for linear disjoint models for K-arm contextual
+bandits.
+"""
+class LinUCB(object):
+    """
+    Initialization:
+        Creates a bandit and init's it's K arms.
+    @param:
+        alpha - expliotation rate
+        d - length of context vector
+        n  - number of arms
+
+        arms: dictionary of UCBArms. Basically contains Da and ca
+              (consequentially Aa also) from the original paper
+
+    """
+    def __init__(self, alpha, d, k):
+        self.alpha = alpha
+        self.d = d  #100
+        self.nArms = k #10
+
+        self.arms = self.init_arms()
+
+    """
+    init_arms:
+        Init nArms of UCBarms
+    @return:
+        arms_dict - dictionary of arms of class UCBArm
+    """
+    def init_arms(self):
+        arms_dict = {}
+        for id in range(1, self.nArms + 1):
+            arms_dict[id] = UCBArm(id, self.d, self.alpha)
+        return arms_dict
+
+    """
+    get_ucbs:
+        Calculates ucb for all arms
+    @param:
+        x - context vector
+    @return:
+        ucbs - dictionary of mappings of v: ucb,  k: arm id
+    """
+    def get_ucbs(self, x):
+        ucbs = {}
+        for arm in self.arms:
+            ucbs[arm] = self.arms[arm].getUCB(x)
+        return ucbs
+
+    """
+    choose_arm:
+        Returns id of arm with maximum ucb. Breaks ties
+        uniformly at random
+    @param:
+        ucbs - dictionary of ucbs for all arms
+    @return:
+        arm_id - id of arm with max ucb
+    """
+    def choose_arm(self, ucbs):
+        max_ucb = -1
+        max_ucb_ids = set()
+        ### todo
+        for _, ucb in ucbs.items():
+            if max_ucb < ucb:
+                max_ucb = ucb
+        for arm, ucb in ucbs.items():
+            if ucb == max_ucb:
+                max_ucb_ids.add(arm)
+
+        if len(max_ucb_ids) > 1:
+            return random.sample(max_ucb_ids, 1)[0]
+        else:
+            return list(max_ucb_ids)[0]
+
+    """
+    get_reward:
+        If predicted 'arm' equals true action
+        reward is 1, else 0
+    @param:
+        arm - predicted action/arm for context
+    @return:
+        action - true observed action for context
+    """
+    def get_reward(self, arm, action):
+        if arm == action:
+            return 1
+        return 0
+
+    """
+    predict:
+        Helper function that calls the above functions
+        to predict an action based on a given context vector
+    @param:
+        x - context vector
+    @return:
+        pred_act - predicted action (arm id)
+    """
+    def predict(self, x):
+        ucbs = self.get_ucbs(x)
+        pred_act = self.choose_arm(ucbs)
+        return pred_act
+
+
+# In[19]:
+
+
+"""
+Class LinUCB implements unbiased offline evaluation
+of our multi-arm contextual bandit following
+Li, Chu, et. al. [2]. At each time step (2...T)
+we use our algorithm from t-1 to predict t context
+vector. We evaluate our bandit's cumulative
+take-rate over time.
+"""
+class bandit_evaluator(object):
+    """
+    Initialization:
+        Creates an evaluator object to store bandit history
+        and calculate CTR
+
+        bandits:  list to store our trained bandit history
+        cum_rewards: cumulative rewards earned
+        ctr_history: CTR history
+
+    """
+    def __init__(self):
+        self.bandits = []
+        self.cum_rewards = 0
+        self.ctr_history = []
+
+    """
+    calc_ctr:
+        Makes prediction for new observed context at time t
+        using the t-1 bandit and gets rewards then calculates
+        CTR
+    @param:
+        x - context vector at time t
+        action - true action for x
+        t - current time step
+    @return:
+        ctr - cumulative take-rate
+    """
+    def calc_ctr(self, x, action, t):
+        assert t > 0
+        bandit = self.bandits[action]
+        pred_act = bandit.predict[x]
+        ### todo
+        ctr =
+        self.ctr_history.append(ctr)
+        return ctr
+
+
+# In[20]:
+
+
+from utils import getData, getContext, getAction
+
+
+"""
+train:
+    Main driver function that implements LinUCB1
+    and trains our multi-arm contextual bandit
+@param:
+    file - data file to use (see readme for example)
+    steps - number of time steps (i.e. total observations in data)
+    nArms - number of bandit arms (K in paper)
+    d - dimension of context vector
+@return:
+    ctr_history - cumulative take-rate history
+"""
+def train(file, steps, alpha, nArms, d):
+    # read in data
+    data = getData(file)
+    # initialize K-arm bandit
+    bandit = LinUCB(alpha, d, nArms)
+    # initialize bandit evaluator
+    evaluator = bandit_evaluator()
+
+    for t in range(steps):
+        x = getContext(data, t)
+        action = getAction(data, t)
+        arm = bandit.predict(x)
+        reward = bandit.get_reward(arm, action)
+        bandit.arms[arm].update_arm(reward, x)
+
+        if t > 0: # explore various alpha update methods to improve CTR
+            bandit.arms[arm].update_alpha(method=2) # or method=2
+            #bandit.arms[arm].update_alpha(3, t)
+
+        if t > 0: # evaluate current bandit algorithm
+            evaluator.bandits.append(bandit)
+            ctr = evaluator.calc_ctr(x, action, t)
+            if t % 100 == 0:
+                print("Step:", t, end="")
+                print(" | CTR: {0:.02f}%".format(ctr))
+    return evaluator.ctr_history
+
+
+
+# In[21]:
+
+
+file = "classification.txt"
+steps = 10000
+alpha = .1
+nArms = 10
+dim = 100
+
+ctr_history = train(file, steps, alpha, nArms, dim)
+
+
+# In[23]:
+
+
+# dianogstics
+print(ctr_history)
+
+
+# In[1]:
+