Commit 2ec84a35 by 20200318029

homework7

parent a37d8c4c
......@@ -182,9 +182,9 @@ class LinUCB(object):
@return:
action - true observed action for context
"""
def get_reward(self, arm, action):
def get_reward(self, arm, action, reward):
if arm == action:
return 1
return reward
return 0
"""
......@@ -226,6 +226,7 @@ class bandit_evaluator(object):
self.bandit = None
self.cum_rewards = 0
self.ctr_history = []
self.T = 1e-5
"""
calc_ctr:
......@@ -239,20 +240,21 @@ class bandit_evaluator(object):
@return:
ctr - cumulative take-rate
"""
def calc_ctr(self, x, action, t):
def calc_ctr(self, x, action, reward, t):
assert t > 0
pred_act = self.bandit.predict(x)
### todo
if pred_act == action:
self.cum_rewards += 1
ctr = self.cum_rewards / t
self.cum_rewards += reward
self.T += 1
ctr = self.cum_rewards / self.T
self.ctr_history.append(ctr)
return ctr
# In[20]:
from utils import getData, getContext, getAction
from utils import getData, getContext, getAction, getReward
"""
......@@ -279,16 +281,17 @@ def train(file, steps, alpha, nArms, d):
for t in range(steps):
x = getContext(data, t)
action = getAction(data, t)
reward = getReward(data, t)
arm = bandit.predict(x)
reward = bandit.get_reward(arm, action)
bandit.arms[arm].update_arm(reward, x)
reward_ = bandit.get_reward(arm, action, reward)
bandit.arms[arm].update_arm(reward_, x)
if t > 0: # explore various alpha update methods to improve CTR
# bandit.arms[arm].update_alpha(method=2) # or method=2
bandit.arms[arm].update_alpha(3, t)
if t > 0: # evaluate current bandit algorithm
ctr = evaluator.calc_ctr(x, action, t)
ctr = evaluator.calc_ctr(x, action, reward, t)
if t % 100 == 0:
print("Step:", t, end="")
print(" | CTR: {0:.02f}%".format(ctr))
......@@ -299,7 +302,7 @@ def train(file, steps, alpha, nArms, d):
# In[21]:
file = "classification.txt"
file = "dataset.txt"
steps = 10000
alpha = .1
nArms = 10
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment