Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
ml2_MiniAssignments
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200318029
ml2_MiniAssignments
Commits
7066ba78
Commit
7066ba78
authored
Aug 04, 2020
by
20200318029
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
homework7
parent
4af6d4e1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
747 additions
and
13 deletions
+747
-13
homework7/A Contextual-Bandit Approach to Personalized News Article Recommendation.xml
+0
-0
homework7/LinUCB-Homework.ipynb
+426
-13
homework7/LinUCB-Homework.py
+321
-0
No files found.
homework7/A Contextual-Bandit Approach to Personalized News Article Recommendation.xml
View file @
7066ba78
This source diff could not be displayed because it is too large. You can
view the blob
instead.
homework7/LinUCB-Homework.ipynb
View file @
7066ba78
...
...
@@ -60,11 +60,11 @@
" ucb - upper confidence bound\n",
" \"\"\"\n",
" def getUCB(self, x):\n",
" Ainv = \n",
" x = \n",
" self.thetaHat = \n",
" self.stdev = \n",
" self.ucb = \n",
" Ainv =
np.linalg.inv(self.A)
\n",
"
#
x = \n",
" self.thetaHat =
np.matmul(Ainv, self.b)
\n",
" self.stdev =
np.sqrt(np.matmul(np.matmul(x.T, Ainv), x))
\n",
" self.ucb =
np.matmul(self.thetaHat.T, x) + self.alpha * self.stdev
\n",
" return self.ucb[0][0]\n",
" \n",
" \"\"\"\n",
...
...
@@ -76,9 +76,9 @@
" x - context vector (1 x d)\n",
" \"\"\"\n",
" def update_arm(self, reward, x):\n",
" x = \n",
" self.A += \n",
" self.b += \n",
"
#
x = \n",
" self.A +=
np.matmul(x, x.T)
\n",
" self.b +=
r * x
\n",
" return None\n",
" \n",
" \"\"\"\n",
...
...
@@ -90,11 +90,11 @@
" \"\"\"\n",
" def update_alpha(self, method=1, t=None):\n",
" if method == 1:\n",
" self.alpha = \n",
" self.alpha =
0.1 / np.sqrt(t + 1)
\n",
" elif method == 2:\n",
" self.alpha = \n",
" self.alpha =
0.01 / np.pow(t + 1, 0.25)
\n",
" elif method == 3:\n",
" self.alpha = \n",
" self.alpha =
10 / (np.sqrt(t + 1) + 2 * t)
\n",
" return None\n",
" "
]
...
...
@@ -439,10 +439,423 @@
},
{
"cell_type": "code",
"execution_count":
null
,
"execution_count":
1
,
"metadata": {},
"outputs": [],
"source": []
"source": [
"from utils import getData, getContext, getAction\n",
"file = \"classification.txt\"\n",
"data = getData(file)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>92</th>\n",
" <th>93</th>\n",
" <th>94</th>\n",
" <th>95</th>\n",
" <th>96</th>\n",
" <th>97</th>\n",
" <th>98</th>\n",
" <th>99</th>\n",
" <th>100</th>\n",
" <th>101</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>37</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" <td>30</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>36</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9</td>\n",
" <td>NaN</td>\n",
" <td>77</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>43</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>44</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9995</th>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9996</th>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9997</th>\n",
" <td>9</td>\n",
" <td>NaN</td>\n",
" <td>79</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9998</th>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>44</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9999</th>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10000 rows × 102 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 92 93 94 \\\n",
"0 5 NaN 5 0 0 37 6 0 0 0 ... 0 0 1 \n",
"1 7 NaN 1 3 36 0 0 0 0 0 ... 0 0 3 \n",
"2 6 NaN 3 0 0 0 0 0 0 0 ... 0 0 0 \n",
"3 9 NaN 77 0 0 13 0 0 0 0 ... 0 5 0 \n",
"4 2 NaN 43 0 2 44 1 0 0 0 ... 0 0 0 \n",
"... ... ... ... ... ... ... ... ... ... ... ... ... ... ... \n",
"9995 7 NaN 0 3 29 0 0 0 0 0 ... 0 0 4 \n",
"9996 7 NaN 1 0 29 0 0 0 0 0 ... 0 0 7 \n",
"9997 9 NaN 79 0 0 17 0 0 0 0 ... 0 4 0 \n",
"9998 2 NaN 29 0 2 44 0 0 0 0 ... 2 0 0 \n",
"9999 7 NaN 0 2 35 0 0 0 0 0 ... 0 0 6 \n",
"\n",
" 95 96 97 98 99 100 101 \n",
"0 3 0 0 17 30 4 0 \n",
"1 0 1 5 1 0 0 10 \n",
"2 0 23 0 11 3 0 0 \n",
"3 0 0 2 4 0 0 0 \n",
"4 0 0 0 0 13 0 0 \n",
"... ... ... ... ... ... ... ... \n",
"9995 0 5 8 1 0 0 9 \n",
"9996 0 2 2 0 0 0 13 \n",
"9997 1 0 3 9 0 0 0 \n",
"9998 0 0 0 0 17 0 0 \n",
"9999 0 0 6 0 0 0 12 \n",
"\n",
"[10000 rows x 102 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2 1.0\n",
"3 3.0\n",
"4 36.0\n",
"5 0.0\n",
"6 0.0\n",
" ... \n",
"97 5.0\n",
"98 1.0\n",
"99 0.0\n",
"100 0.0\n",
"101 10.0\n",
"Name: 1, Length: 100, dtype: float64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"getContext(data, 1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"getAction(data, 1)"
]
}
],
"metadata": {
...
...
homework7/LinUCB-Homework.py
0 → 100644
View file @
7066ba78
#!/usr/bin/env python
# coding: utf-8
# In[16]:
import
numpy
as
np
import
math
import
random
# In[17]:
"""
Class UCBArm creates the LinUCB K-arms of our contextual-bandit.
Via the LinUCB1 algorithm, our multi-arm bandit
learns to optimize the cumulative take-rate (CTR) and minimize
regret log-linearly. Through this optimization our bandit learns
in an on-line manner by sequentially updating bandit arms
based on an observed reward given a new context vector at each
time step.
"""
class
UCBArm
(
object
):
"""
Initialization:
All bandits at time step 0 are initialized to
have a DxD design matrix 'A' that is the identity
matrix and 'b' vector init to 0s.
@param:
id - unique arm id (1...K) for each arm
d - length of context vector
alpha - exploitation rate
"""
def
__init__
(
self
,
id
,
d
,
alpha
):
self
.
id
=
id
self
.
d
=
d
self
.
alpha
=
alpha
# Li lines 5-6
self
.
A
=
np
.
identity
(
self
.
d
)
self
.
b
=
np
.
zeros
((
self
.
d
,
1
))
"""
getUCB:
Calculates the ucb given a context vector.
Assumes expected payoff is linear in its
d-dimensional feature vector. When considering
all the arms of the bandit this is performing
ridge regression to predict which arm should
be played given a context vector and using
on all the arms UCBs.
@param:
x - context vector (1 x d)
@return:
ucb - upper confidence bound
"""
def
getUCB
(
self
,
x
):
Ainv
=
np
.
linalg
.
inv
(
self
.
A
)
x
=
x
.
values
[:,
None
]
self
.
thetaHat
=
np
.
matmul
(
Ainv
,
self
.
b
)
self
.
stdev
=
np
.
sqrt
(
np
.
matmul
(
np
.
matmul
(
x
.
T
,
Ainv
),
x
))
self
.
ucb
=
np
.
matmul
(
self
.
thetaHat
.
T
,
x
)
+
self
.
alpha
*
self
.
stdev
return
self
.
ucb
[
0
][
0
]
"""
update_arm:
Updates an arm's 'A' matrix and 'b' vector
based on observed reward and context vector
@param:
reward - reward for predicted action
x - context vector (1 x d)
"""
def
update_arm
(
self
,
reward
,
x
):
x
=
x
.
values
[:,
None
]
self
.
A
+=
np
.
matmul
(
x
,
x
.
T
)
self
.
b
+=
reward
*
x
return
None
"""
update_alpha:
Used to update alpha during the training process
@param:
method - alpha update rule
t - None (default); use the time step to update alpha (ex alpha/t)
"""
def
update_alpha
(
self
,
method
=
1
,
t
=
None
):
if
method
==
1
:
self
.
alpha
=
0.1
/
np
.
sqrt
(
t
+
1
)
elif
method
==
2
:
self
.
alpha
=
0.1
elif
method
==
3
:
self
.
alpha
=
10
/
(
np
.
sqrt
(
t
+
1
)
+
2
*
t
)
return
None
# In[18]:
"""
Class LinUCB implements Li's LinUCB Algorithm [1]
for linear disjoint models for K-arm contextual
bandits.
"""
class
LinUCB
(
object
):
"""
Initialization:
Creates a bandit and init's it's K arms.
@param:
alpha - expliotation rate
d - length of context vector
n - number of arms
arms: dictionary of UCBArms. Basically contains Da and ca
(consequentially Aa also) from the original paper
"""
def
__init__
(
self
,
alpha
,
d
,
k
):
self
.
alpha
=
alpha
self
.
d
=
d
#100
self
.
nArms
=
k
#10
self
.
arms
=
self
.
init_arms
()
"""
init_arms:
Init nArms of UCBarms
@return:
arms_dict - dictionary of arms of class UCBArm
"""
def
init_arms
(
self
):
arms_dict
=
{}
for
id
in
range
(
1
,
self
.
nArms
+
1
):
arms_dict
[
id
]
=
UCBArm
(
id
,
self
.
d
,
self
.
alpha
)
return
arms_dict
"""
get_ucbs:
Calculates ucb for all arms
@param:
x - context vector
@return:
ucbs - dictionary of mappings of v: ucb, k: arm id
"""
def
get_ucbs
(
self
,
x
):
ucbs
=
{}
for
arm
in
self
.
arms
:
ucbs
[
arm
]
=
self
.
arms
[
arm
]
.
getUCB
(
x
)
return
ucbs
"""
choose_arm:
Returns id of arm with maximum ucb. Breaks ties
uniformly at random
@param:
ucbs - dictionary of ucbs for all arms
@return:
arm_id - id of arm with max ucb
"""
def
choose_arm
(
self
,
ucbs
):
max_ucb
=
-
1
max_ucb_ids
=
set
()
### todo
for
_
,
ucb
in
ucbs
.
items
():
if
max_ucb
<
ucb
:
max_ucb
=
ucb
for
arm
,
ucb
in
ucbs
.
items
():
if
ucb
==
max_ucb
:
max_ucb_ids
.
add
(
arm
)
if
len
(
max_ucb_ids
)
>
1
:
return
random
.
sample
(
max_ucb_ids
,
1
)[
0
]
else
:
return
list
(
max_ucb_ids
)[
0
]
"""
get_reward:
If predicted 'arm' equals true action
reward is 1, else 0
@param:
arm - predicted action/arm for context
@return:
action - true observed action for context
"""
def
get_reward
(
self
,
arm
,
action
):
if
arm
==
action
:
return
1
return
0
"""
predict:
Helper function that calls the above functions
to predict an action based on a given context vector
@param:
x - context vector
@return:
pred_act - predicted action (arm id)
"""
def
predict
(
self
,
x
):
ucbs
=
self
.
get_ucbs
(
x
)
pred_act
=
self
.
choose_arm
(
ucbs
)
return
pred_act
# In[19]:
"""
Class LinUCB implements unbiased offline evaluation
of our multi-arm contextual bandit following
Li, Chu, et. al. [2]. At each time step (2...T)
we use our algorithm from t-1 to predict t context
vector. We evaluate our bandit's cumulative
take-rate over time.
"""
class
bandit_evaluator
(
object
):
"""
Initialization:
Creates an evaluator object to store bandit history
and calculate CTR
bandits: list to store our trained bandit history
cum_rewards: cumulative rewards earned
ctr_history: CTR history
"""
def
__init__
(
self
):
self
.
bandits
=
[]
self
.
cum_rewards
=
0
self
.
ctr_history
=
[]
"""
calc_ctr:
Makes prediction for new observed context at time t
using the t-1 bandit and gets rewards then calculates
CTR
@param:
x - context vector at time t
action - true action for x
t - current time step
@return:
ctr - cumulative take-rate
"""
def
calc_ctr
(
self
,
x
,
action
,
t
):
assert
t
>
0
bandit
=
self
.
bandits
[
action
]
pred_act
=
bandit
.
predict
[
x
]
### todo
ctr
=
self
.
ctr_history
.
append
(
ctr
)
return
ctr
# In[20]:
from
utils
import
getData
,
getContext
,
getAction
"""
train:
Main driver function that implements LinUCB1
and trains our multi-arm contextual bandit
@param:
file - data file to use (see readme for example)
steps - number of time steps (i.e. total observations in data)
nArms - number of bandit arms (K in paper)
d - dimension of context vector
@return:
ctr_history - cumulative take-rate history
"""
def
train
(
file
,
steps
,
alpha
,
nArms
,
d
):
# read in data
data
=
getData
(
file
)
# initialize K-arm bandit
bandit
=
LinUCB
(
alpha
,
d
,
nArms
)
# initialize bandit evaluator
evaluator
=
bandit_evaluator
()
for
t
in
range
(
steps
):
x
=
getContext
(
data
,
t
)
action
=
getAction
(
data
,
t
)
arm
=
bandit
.
predict
(
x
)
reward
=
bandit
.
get_reward
(
arm
,
action
)
bandit
.
arms
[
arm
]
.
update_arm
(
reward
,
x
)
if
t
>
0
:
# explore various alpha update methods to improve CTR
bandit
.
arms
[
arm
]
.
update_alpha
(
method
=
2
)
# or method=2
#bandit.arms[arm].update_alpha(3, t)
if
t
>
0
:
# evaluate current bandit algorithm
evaluator
.
bandits
.
append
(
bandit
)
ctr
=
evaluator
.
calc_ctr
(
x
,
action
,
t
)
if
t
%
100
==
0
:
print
(
"Step:"
,
t
,
end
=
""
)
print
(
" | CTR: {0:.02f}
%
"
.
format
(
ctr
))
return
evaluator
.
ctr_history
# In[21]:
file
=
"classification.txt"
steps
=
10000
alpha
=
.
1
nArms
=
10
dim
=
100
ctr_history
=
train
(
file
,
steps
,
alpha
,
nArms
,
dim
)
# In[23]:
# dianogstics
print
(
ctr_history
)
# In[1]:
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment