Commit 1f2efa54 by 20200318029

homework7

parent 7066ba78
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -85,11 +85,11 @@ class UCBArm(object): ...@@ -85,11 +85,11 @@ class UCBArm(object):
""" """
def update_alpha(self, method=1, t=None): def update_alpha(self, method=1, t=None):
if method == 1: if method == 1:
self.alpha = 0.1 / np.sqrt(t + 1) self.alpha = 1 / np.sqrt(t)
elif method == 2: elif method == 2:
self.alpha = 0.1 self.alpha = 0.1
elif method == 3: elif method == 3:
self.alpha = 10 / (np.sqrt(t + 1) + 2 * t) self.alpha = 10 / (np.sqrt(t) + t)
return None return None
...@@ -203,8 +203,6 @@ class LinUCB(object): ...@@ -203,8 +203,6 @@ class LinUCB(object):
# In[19]: # In[19]:
""" """
Class LinUCB implements unbiased offline evaluation Class LinUCB implements unbiased offline evaluation
of our multi-arm contextual bandit following of our multi-arm contextual bandit following
...@@ -243,14 +241,17 @@ class bandit_evaluator(object): ...@@ -243,14 +241,17 @@ class bandit_evaluator(object):
""" """
def calc_ctr(self, x, action, t): def calc_ctr(self, x, action, t):
assert t > 0 assert t > 0
bandit = self.bandits[action] bandit = self.bandits[-1]
pred_act = bandit.predict[x] pred_act = bandit.predict(x)
### todo ### todo
ctr = if len(self.ctr_history):
hist = self.ctr_history[-1] * (t - 1)
else:
hist = 0
ctr = (hist + int(pred_act == action)) / t
self.ctr_history.append(ctr) self.ctr_history.append(ctr)
return ctr return ctr
# In[20]: # In[20]:
...@@ -285,8 +286,8 @@ def train(file, steps, alpha, nArms, d): ...@@ -285,8 +286,8 @@ def train(file, steps, alpha, nArms, d):
bandit.arms[arm].update_arm(reward, x) bandit.arms[arm].update_arm(reward, x)
if t > 0: # explore various alpha update methods to improve CTR if t > 0: # explore various alpha update methods to improve CTR
bandit.arms[arm].update_alpha(method=2) # or method=2 # bandit.arms[arm].update_alpha(method=2) # or method=2
#bandit.arms[arm].update_alpha(3, t) bandit.arms[arm].update_alpha(3, t)
if t > 0: # evaluate current bandit algorithm if t > 0: # evaluate current bandit algorithm
evaluator.bandits.append(bandit) evaluator.bandits.append(bandit)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment