Commit 10422f3e by 20200318029

homework7

parent 1f2efa54
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -223,7 +223,7 @@ class bandit_evaluator(object): ...@@ -223,7 +223,7 @@ class bandit_evaluator(object):
""" """
def __init__(self): def __init__(self):
self.bandits = [] self.bandit = None
self.cum_rewards = 0 self.cum_rewards = 0
self.ctr_history = [] self.ctr_history = []
...@@ -241,14 +241,11 @@ class bandit_evaluator(object): ...@@ -241,14 +241,11 @@ class bandit_evaluator(object):
""" """
def calc_ctr(self, x, action, t): def calc_ctr(self, x, action, t):
assert t > 0 assert t > 0
bandit = self.bandits[-1] pred_act = self.bandit.predict(x)
pred_act = bandit.predict(x)
### todo ### todo
if len(self.ctr_history): if pred_act == action:
hist = self.ctr_history[-1] * (t - 1) self.cum_rewards += 1
else: ctr = self.cum_rewards / t
hist = 0
ctr = (hist + int(pred_act == action)) / t
self.ctr_history.append(ctr) self.ctr_history.append(ctr)
return ctr return ctr
...@@ -277,6 +274,7 @@ def train(file, steps, alpha, nArms, d): ...@@ -277,6 +274,7 @@ def train(file, steps, alpha, nArms, d):
bandit = LinUCB(alpha, d, nArms) bandit = LinUCB(alpha, d, nArms)
# initialize bandit evaluator # initialize bandit evaluator
evaluator = bandit_evaluator() evaluator = bandit_evaluator()
evaluator.bandit = bandit
for t in range(steps): for t in range(steps):
x = getContext(data, t) x = getContext(data, t)
...@@ -290,7 +288,6 @@ def train(file, steps, alpha, nArms, d): ...@@ -290,7 +288,6 @@ def train(file, steps, alpha, nArms, d):
bandit.arms[arm].update_alpha(3, t) bandit.arms[arm].update_alpha(3, t)
if t > 0: # evaluate current bandit algorithm if t > 0: # evaluate current bandit algorithm
evaluator.bandits.append(bandit)
ctr = evaluator.calc_ctr(x, action, t) ctr = evaluator.calc_ctr(x, action, t)
if t % 100 == 0: if t % 100 == 0:
print("Step:", t, end="") print("Step:", t, end="")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment