Commit 1f2efa54 by 20200318029

homework7

parent 7066ba78
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -85,11 +85,11 @@ class UCBArm(object):
"""
def update_alpha(self, method=1, t=None):
if method == 1:
self.alpha = 0.1 / np.sqrt(t + 1)
self.alpha = 1 / np.sqrt(t)
elif method == 2:
self.alpha = 0.1
elif method == 3:
self.alpha = 10 / (np.sqrt(t + 1) + 2 * t)
self.alpha = 10 / (np.sqrt(t) + t)
return None
......@@ -203,8 +203,6 @@ class LinUCB(object):
# In[19]:
"""
Class LinUCB implements unbiased offline evaluation
of our multi-arm contextual bandit following
......@@ -243,14 +241,17 @@ class bandit_evaluator(object):
"""
def calc_ctr(self, x, action, t):
assert t > 0
bandit = self.bandits[action]
pred_act = bandit.predict[x]
bandit = self.bandits[-1]
pred_act = bandit.predict(x)
### todo
ctr =
if len(self.ctr_history):
hist = self.ctr_history[-1] * (t - 1)
else:
hist = 0
ctr = (hist + int(pred_act == action)) / t
self.ctr_history.append(ctr)
return ctr
# In[20]:
......@@ -285,8 +286,8 @@ def train(file, steps, alpha, nArms, d):
bandit.arms[arm].update_arm(reward, x)
if t > 0: # explore various alpha update methods to improve CTR
bandit.arms[arm].update_alpha(method=2) # or method=2
#bandit.arms[arm].update_alpha(3, t)
# bandit.arms[arm].update_alpha(method=2) # or method=2
bandit.arms[arm].update_alpha(3, t)
if t > 0: # evaluate current bandit algorithm
evaluator.bandits.append(bandit)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment