Commit 10422f3e by 20200318029

homework7

parent 1f2efa54
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -223,7 +223,7 @@ class bandit_evaluator(object):
"""
def __init__(self):
self.bandits = []
self.bandit = None
self.cum_rewards = 0
self.ctr_history = []
......@@ -241,14 +241,11 @@ class bandit_evaluator(object):
"""
def calc_ctr(self, x, action, t):
assert t > 0
bandit = self.bandits[-1]
pred_act = bandit.predict(x)
pred_act = self.bandit.predict(x)
### todo
if len(self.ctr_history):
hist = self.ctr_history[-1] * (t - 1)
else:
hist = 0
ctr = (hist + int(pred_act == action)) / t
if pred_act == action:
self.cum_rewards += 1
ctr = self.cum_rewards / t
self.ctr_history.append(ctr)
return ctr
......@@ -277,6 +274,7 @@ def train(file, steps, alpha, nArms, d):
bandit = LinUCB(alpha, d, nArms)
# initialize bandit evaluator
evaluator = bandit_evaluator()
evaluator.bandit = bandit
for t in range(steps):
x = getContext(data, t)
......@@ -290,7 +288,6 @@ def train(file, steps, alpha, nArms, d):
bandit.arms[arm].update_alpha(3, t)
if t > 0: # evaluate current bandit algorithm
evaluator.bandits.append(bandit)
ctr = evaluator.calc_ctr(x, action, t)
if t % 100 == 0:
print("Step:", t, end="")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment