Commit c8510dcf by 20200318029

homework7

parent cd5aec2c
%# batch size%
python gmf.py --batch_size 512 --lr 0.01 --n_emb 8 --epochs 30
python gmf.py --batch_size 1024 --lr 0.01 --n_emb 8 --epochs 30
python gmf.py --batch_size 1024 --lr 0.01 --n_emb 8 --epochs 30 --validate_every 2
%# learning rates%
python gmf.py --batch_size 1024 --lr 0.001 --n_emb 8 --epochs 30 --validate_every 2
python gmf.py --batch_size 1024 --lr 0.005 --n_emb 8 --epochs 30 --validate_every 2
python gmf.py --batch_size 1024 --lr 0.01 --n_emb 8 --lr_scheduler --epochs 30 --validate_every 2
%# Embeddings%
python gmf.py --batch_size 1024 --lr 0.01 --n_emb 16 --epochs 30 --validate_every 2
python gmf.py --batch_size 1024 --lr 0.01 --n_emb 32 --epochs 30 --validate_every 2
python gmf.py --batch_size 1024 --lr 0.01 --n_emb 64 --epochs 30 --validate_every 2
%# batch size%
python mlp.py --batch_size 512 --lr 0.01 --layers "[32, 16, 8]" --epochs 30 --validate_every 2
python mlp.py --batch_size 1024 --lr 0.01 --layers "[32, 16, 8]" --epochs 30 --validate_every 2
%# learning rates%
python mlp.py --batch_size 1024 --lr 0.001 --layers "[32, 16, 8]" --epochs 30 --validate_every 2
python mlp.py --batch_size 1024 --lr 0.005 --layers "[32, 16, 8]" --epochs 30 --validate_every 2
python mlp.py --batch_size 1024 --lr 0.01 --layers "[32, 16, 8]" --epochs 30 --lr_scheduler --validate_every 2
%# Embeddings%
python mlp.py --batch_size 1024 --lr 0.01 --layers "[64, 32, 16]" --epochs 30 --validate_every 2
python mlp.py --batch_size 1024 --lr 0.01 --layers "[128, 64, 32]" --epochs 30 --validate_every 2
%# higher lr and lr_scheduler%
python mlp.py --batch_size 1024 --lr 0.03 --layers "[64, 32, 16]" --epochs 30 --validate_every 2
python mlp.py --batch_size 1024 --lr 0.03 --layers "[128, 64, 32]" --epochs 30 --validate_every 2
python mlp.py --batch_size 1024 --lr 0.03 --layers "[64, 32, 16]" --epochs 30 --lr_scheduler --validate_every 2
python mlp.py --batch_size 1024 --lr 0.03 --layers "[128, 64, 32]" --epochs 30 --lr_scheduler --validate_every 2
%# neumf%
python neumf.py --batch_size 1024 --lr 0.01 --n_emb 8 --lr_scheduler --layers "[32, 16, 8]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_512_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_512_reg_00_lr_001_n_emb_16_ll_8_dp_wodp_lrnr_adam_lrs_wolrs.pt" \
--epochs 1 --learner "SGD"
python neumf.py --batch_size 1024 --lr 0.01 --n_emb 8 --lr_scheduler --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--epochs 20 --learner "SGD" --validate_every 2
python neumf.py --batch_size 1024 --lr 0.01 --n_emb 8 --lr_scheduler --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--epochs 20 --learner "SGD" --validate_every 2
python neumf.py --batch_size 1024 --lr 0.01 --n_emb 8 --lr_scheduler --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--epochs 20 --learner "SGD" --validate_every 2
python neumf.py --batch_size 1024 --lr 0.01 --n_emb 8 --lr_scheduler --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--epochs 20 --validate_every 2
%# I repeated this experiment 3 times: with and without momentum and a 3rd time
# with MSE but did not save it%
python neumf.py --batch_size 1024 --lr 0.001 --n_emb 8 --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--freeze 1 --epochs 4 --learner "SGD"
python neumf.py --batch_size 1024 --lr 0.001 --n_emb 8 --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--freeze 1 --epochs 4 --learner "SGD"
python neumf.py --batch_size 1024 --lr 0.001 --n_emb 8 --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--freeze 1 --epochs 4 --learner "SGD"
python neumf.py --batch_size 1024 --lr 0.001 --n_emb 8 --layers "[128, 64, 32]" --dropouts "[0.,0.]" \
--mf_pretrain "GMF_bs_1024_lr_001_n_emb_8_lrnr_adam_lrs_wolrs.pt" \
--mlp_pretrain "MLP_bs_1024_reg_00_lr_003_n_emb_64_ll_32_dp_wodp_lrnr_adam_lrs_wlrs.pt" \
--freeze 1 --epochs 4
{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import math\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Class UCBArm creates the LinUCB K-arms of our contextual-bandit.\n",
"Via the LinUCB1 algorithm, our multi-arm bandit\n",
"learns to optimize the cumulative take-rate (CTR) and minimize\n",
"regret log-linearly. Through this optimization our bandit learns\n",
"in an on-line manner by sequentially updating bandit arms \n",
"based on an observed reward given a new context vector at each\n",
"time step. \n",
"\"\"\"\n",
"class UCBArm(object):\n",
" \"\"\"\n",
" Initialization: \n",
" All bandits at time step 0 are initialized to\n",
" have a DxD design matrix 'A' that is the identity\n",
" matrix and 'b' vector init to 0s. \n",
" @param:\n",
" id - unique arm id (1...K) for each arm\n",
" d - length of context vector\n",
" alpha - exploitation rate\n",
" \"\"\"\n",
" def __init__(self, id, d, alpha):\n",
" self.id = id\n",
" self.d = d\n",
" self.alpha = alpha\n",
" # Li lines 5-6\n",
" self.A = np.identity(self.d)\n",
" self.b = np.zeros((self.d,1))\n",
" \n",
" \"\"\"\n",
" getUCB: \n",
" Calculates the ucb given a context vector.\n",
" Assumes expected payoff is linear in its\n",
" d-dimensional feature vector. When considering\n",
" all the arms of the bandit this is performing\n",
" ridge regression to predict which arm should\n",
" be played given a context vector and using\n",
" on all the arms UCBs.\n",
" @param:\n",
" x - context vector (1 x d)\n",
" @return:\n",
" ucb - upper confidence bound\n",
" \"\"\"\n",
" def getUCB(self, x):\n",
" Ainv = \n",
" x = \n",
" self.thetaHat = \n",
" self.stdev = \n",
" self.ucb = \n",
" return self.ucb[0][0]\n",
" \n",
" \"\"\"\n",
" update_arm: \n",
" Updates an arm's 'A' matrix and 'b' vector\n",
" based on observed reward and context vector\n",
" @param:\n",
" reward - reward for predicted action\n",
" x - context vector (1 x d)\n",
" \"\"\"\n",
" def update_arm(self, reward, x):\n",
" x = \n",
" self.A += \n",
" self.b += \n",
" return None\n",
" \n",
" \"\"\"\n",
" update_alpha: \n",
" Used to update alpha during the training process\n",
" @param:\n",
" method - alpha update rule\n",
" t - None (default); use the time step to update alpha (ex alpha/t)\n",
" \"\"\"\n",
" def update_alpha(self, method=1, t=None):\n",
" if method == 1:\n",
" self.alpha = \n",
" elif method == 2:\n",
" self.alpha = \n",
" elif method == 3:\n",
" self.alpha = \n",
" return None\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Class LinUCB implements Li's LinUCB Algorithm [1]\n",
"for linear disjoint models for K-arm contextual\n",
"bandits. \n",
"\"\"\"\n",
"class LinUCB(object):\n",
" \"\"\"\n",
" Initialization: \n",
" Creates a bandit and init's it's K arms. \n",
" @param:\n",
" alpha - expliotation rate\n",
" d - length of context vector\n",
" n - number of arms\n",
" \n",
" arms: dictionary of UCBArms. Basically contains Da and ca\n",
" (consequentially Aa also) from the original paper\n",
" \n",
" \"\"\"\n",
" def __init__(self, alpha, d, k):\n",
" self.alpha = alpha\n",
" self.d = d #100\n",
" self.nArms = k #10\n",
" \n",
" self.arms = self.init_arms()\n",
" \n",
" \"\"\"\n",
" init_arms: \n",
" Init nArms of UCBarms\n",
" @return:\n",
" arms_dict - dictionary of arms of class UCBArm\n",
" \"\"\"\n",
" def init_arms(self):\n",
" arms_dict = {}\n",
" for id in range(1, self.nArms + 1):\n",
" arms_dict[id] = UCBArm(id, self.d, self.alpha)\n",
" return arms_dict\n",
" \n",
" \"\"\"\n",
" get_ucbs: \n",
" Calculates ucb for all arms\n",
" @param:\n",
" x - context vector\n",
" @return:\n",
" ucbs - dictionary of mappings of v: ucb, k: arm id\n",
" \"\"\"\n",
" def get_ucbs(self, x):\n",
" ucbs = {}\n",
" for arm in self.arms:\n",
" ucbs[arm] = self.arms[arm].getUCB(x)\n",
" return ucbs\n",
" \n",
" \"\"\"\n",
" choose_arm: \n",
" Returns id of arm with maximum ucb. Breaks ties\n",
" uniformly at random\n",
" @param:\n",
" ucbs - dictionary of ucbs for all arms\n",
" @return:\n",
" arm_id - id of arm with max ucb\n",
" \"\"\"\n",
" def choose_arm(self, ucbs):\n",
" max_ucb = -1\n",
" max_ucb_ids = set()\n",
" ### todo\n",
" \n",
" if len(max_ucb_ids) > 1:\n",
" return random.sample(max_ucb_ids, 1)[0]\n",
" else:\n",
" return list(max_ucb_ids)[0]\n",
" \n",
" \"\"\"\n",
" get_reward: \n",
" If predicted 'arm' equals true action\n",
" reward is 1, else 0\n",
" @param:\n",
" arm - predicted action/arm for context\n",
" @return:\n",
" action - true observed action for context\n",
" \"\"\"\n",
" def get_reward(self, arm, action):\n",
" if arm == action:\n",
" return 1\n",
" return 0\n",
" \n",
" \"\"\"\n",
" predict: \n",
" Helper function that calls the above functions \n",
" to predict an action based on a given context vector\n",
" @param:\n",
" x - context vector\n",
" @return:\n",
" pred_act - predicted action (arm id)\n",
" \"\"\"\n",
" def predict(self, x):\n",
" ucbs = self.get_ucbs(x)\n",
" pred_act = self.choose_arm(ucbs)\n",
" return pred_act"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Class LinUCB implements unbiased offline evaluation\n",
"of our multi-arm contextual bandit following\n",
"Li, Chu, et. al. [2]. At each time step (2...T)\n",
"we use our algorithm from t-1 to predict t context\n",
"vector. We evaluate our bandit's cumulative\n",
"take-rate over time. \n",
"\"\"\"\n",
"class bandit_evaluator(object):\n",
" \"\"\"\n",
" Initialization: \n",
" Creates an evaluator object to store bandit history \n",
" and calculate CTR\n",
" \n",
" bandits: list to store our trained bandit history\n",
" cum_rewards: cumulative rewards earned\n",
" ctr_history: CTR history\n",
" \n",
" \"\"\"\n",
" def __init__(self):\n",
" self.bandits = []\n",
" self.cum_rewards = 0\n",
" self.ctr_history = []\n",
" \n",
" \"\"\"\n",
" calc_ctr: \n",
" Makes prediction for new observed context at time t\n",
" using the t-1 bandit and gets rewards then calculates\n",
" CTR \n",
" @param:\n",
" x - context vector at time t\n",
" action - true action for x\n",
" t - current time step\n",
" @return:\n",
" ctr - cumulative take-rate\n",
" \"\"\"\n",
" def calc_ctr(self, x, action, t):\n",
" assert t > 0\n",
" bandit = \n",
" pred_act = \n",
" ### todo \n",
" ctr = \n",
" self.ctr_history.append(ctr)\n",
" return ctr"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"from utils import getData, getContext, getAction\n",
"\n",
"\n",
"\"\"\"\n",
"train: \n",
" Main driver function that implements LinUCB1\n",
" and trains our multi-arm contextual bandit\n",
"@param:\n",
" file - data file to use (see readme for example)\n",
" steps - number of time steps (i.e. total observations in data)\n",
" nArms - number of bandit arms (K in paper)\n",
" d - dimension of context vector\n",
"@return:\n",
" ctr_history - cumulative take-rate history\n",
"\"\"\"\n",
"def train(file, steps, alpha, nArms, d):\n",
" # read in data\n",
" data = getData(file)\n",
" # initialize K-arm bandit\n",
" bandit = LinUCB(alpha, d, nArms)\n",
" # initialize bandit evaluator \n",
" evaluator = bandit_evaluator()\n",
" \n",
" for t in range(steps): \n",
" x = getContext(data, t)\n",
" action = getAction(data, t)\n",
" arm = bandit.predict(x)\n",
" reward = bandit.get_reward(arm, action)\n",
" bandit.arms[arm].update_arm(reward, x)\n",
" \n",
" if t > 0: # explore various alpha update methods to improve CTR\n",
" bandit.arms[arm].update_alpha(method=2) # or method=2\n",
" #bandit.arms[arm].update_alpha(3, t)\n",
" \n",
" if t > 0: # evaluate current bandit algorithm\n",
" evaluator.bandits.append(bandit)\n",
" ctr = evaluator.calc_ctr(x, action, t)\n",
" if t % 100 == 0:\n",
" print(\"Step:\", t, end=\"\")\n",
" print(\" | CTR: {0:.02f}%\".format(ctr))\n",
" return evaluator.ctr_history\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Step: 100 | CTR: 0.58%\n",
"Step: 200 | CTR: 0.74%\n",
"Step: 300 | CTR: 0.81%\n",
"Step: 400 | CTR: 0.84%\n",
"Step: 500 | CTR: 0.87%\n",
"Step: 600 | CTR: 0.89%\n",
"Step: 700 | CTR: 0.90%\n",
"Step: 800 | CTR: 0.91%\n",
"Step: 900 | CTR: 0.92%\n",
"Step: 1000 | CTR: 0.93%\n",
"Step: 1100 | CTR: 0.93%\n",
"Step: 1200 | CTR: 0.94%\n",
"Step: 1300 | CTR: 0.94%\n",
"Step: 1400 | CTR: 0.94%\n",
"Step: 1500 | CTR: 0.95%\n",
"Step: 1600 | CTR: 0.95%\n",
"Step: 1700 | CTR: 0.95%\n",
"Step: 1800 | CTR: 0.96%\n",
"Step: 1900 | CTR: 0.96%\n",
"Step: 2000 | CTR: 0.96%\n",
"Step: 2100 | CTR: 0.96%\n",
"Step: 2200 | CTR: 0.96%\n",
"Step: 2300 | CTR: 0.97%\n",
"Step: 2400 | CTR: 0.97%\n",
"Step: 2500 | CTR: 0.97%\n",
"Step: 2600 | CTR: 0.97%\n",
"Step: 2700 | CTR: 0.97%\n",
"Step: 2800 | CTR: 0.97%\n",
"Step: 2900 | CTR: 0.97%\n",
"Step: 3000 | CTR: 0.97%\n",
"Step: 3100 | CTR: 0.97%\n",
"Step: 3200 | CTR: 0.98%\n",
"Step: 3300 | CTR: 0.98%\n",
"Step: 3400 | CTR: 0.98%\n",
"Step: 3500 | CTR: 0.98%\n",
"Step: 3600 | CTR: 0.98%\n",
"Step: 3700 | CTR: 0.98%\n",
"Step: 3800 | CTR: 0.98%\n",
"Step: 3900 | CTR: 0.98%\n",
"Step: 4000 | CTR: 0.98%\n",
"Step: 4100 | CTR: 0.98%\n",
"Step: 4200 | CTR: 0.98%\n",
"Step: 4300 | CTR: 0.98%\n",
"Step: 4400 | CTR: 0.98%\n",
"Step: 4500 | CTR: 0.98%\n",
"Step: 4600 | CTR: 0.98%\n",
"Step: 4700 | CTR: 0.98%\n",
"Step: 4800 | CTR: 0.98%\n",
"Step: 4900 | CTR: 0.98%\n",
"Step: 5000 | CTR: 0.98%\n",
"Step: 5100 | CTR: 0.98%\n",
"Step: 5200 | CTR: 0.98%\n",
"Step: 5300 | CTR: 0.99%\n",
"Step: 5400 | CTR: 0.99%\n",
"Step: 5500 | CTR: 0.99%\n",
"Step: 5600 | CTR: 0.99%\n",
"Step: 5700 | CTR: 0.99%\n",
"Step: 5800 | CTR: 0.99%\n",
"Step: 5900 | CTR: 0.99%\n",
"Step: 6000 | CTR: 0.99%\n",
"Step: 6100 | CTR: 0.99%\n",
"Step: 6200 | CTR: 0.99%\n",
"Step: 6300 | CTR: 0.99%\n",
"Step: 6400 | CTR: 0.99%\n",
"Step: 6500 | CTR: 0.99%\n",
"Step: 6600 | CTR: 0.99%\n",
"Step: 6700 | CTR: 0.99%\n",
"Step: 6800 | CTR: 0.99%\n",
"Step: 6900 | CTR: 0.99%\n",
"Step: 7000 | CTR: 0.99%\n",
"Step: 7100 | CTR: 0.99%\n",
"Step: 7200 | CTR: 0.99%\n",
"Step: 7300 | CTR: 0.99%\n",
"Step: 7400 | CTR: 0.99%\n",
"Step: 7500 | CTR: 0.99%\n",
"Step: 7600 | CTR: 0.99%\n",
"Step: 7700 | CTR: 0.99%\n",
"Step: 7800 | CTR: 0.99%\n",
"Step: 7900 | CTR: 0.99%\n",
"Step: 8000 | CTR: 0.99%\n",
"Step: 8100 | CTR: 0.99%\n",
"Step: 8200 | CTR: 0.99%\n",
"Step: 8300 | CTR: 0.99%\n",
"Step: 8400 | CTR: 0.99%\n",
"Step: 8500 | CTR: 0.99%\n",
"Step: 8600 | CTR: 0.99%\n",
"Step: 8700 | CTR: 0.99%\n",
"Step: 8800 | CTR: 0.99%\n",
"Step: 8900 | CTR: 0.99%\n",
"Step: 9000 | CTR: 0.99%\n",
"Step: 9100 | CTR: 0.99%\n",
"Step: 9200 | CTR: 0.99%\n",
"Step: 9300 | CTR: 0.99%\n",
"Step: 9400 | CTR: 0.99%\n",
"Step: 9500 | CTR: 0.99%\n",
"Step: 9600 | CTR: 0.99%\n",
"Step: 9700 | CTR: 0.99%\n",
"Step: 9800 | CTR: 0.99%\n",
"Step: 9900 | CTR: 0.99%\n"
]
}
],
"source": [
"file = \"classification.txt\"\n",
"steps = 10000\n",
"alpha = .1\n",
"nArms = 10\n",
"dim = 100\n",
"\n",
"ctr_history = train(file, steps, alpha, nArms, dim)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# dianogstics\n",
"print(ctr_history)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This source diff could not be displayed because it is too large. You can view the blob instead.
# LinUCB for Disjoint Linear Models
### An implementation of the Disjoint Linear UCB bandit algorithm for the multi-arm contextual bandit problem with unbiased offline evaluation following Li(2012) [1] and [2]. Sample data can be found at: https://goo.gl/mRPGUp
### References:
#### 1. A Contextual-Bandit Approach to Personalized News Article Recommendation
#### 2. Unbiased Offline Evaluation of Contextual-bandit-based News Article Recommendation Algorithms
\ No newline at end of file
"""
Colby Wise
LinUCB1 Multi-Arm Bandit Problem
Re-inforcement Learning
Data helper functions that return the correct
column or row from a pandas dataframe
"""
import pandas as pd
"""
Read in CSV data file and remove 'timestamp' column
@param:
file - file directory
@return:
data - pandas dataframe of action & context vectors
"""
def getData(file):
data = pd.read_csv(file, sep=" ", header=None)
data = data.loc[:,:101]
return data
"""
Helps sequentially retrieve a context vector at each time step
in the algorithm
@param:
data - pandas df
idx - time step (row) wanted
@return:
x - context vector for given time step
"""
def getContext(data, idx):
return data.loc[idx, 2:]
"""
Helps sequentially retrieve an action vector at each time step
in the algorithm
@param:
data - pandas df
idx - time step (row) wanted
@return:
action - action vector for given time step
"""
def getAction(data, idx):
return data.loc[idx, 0]
"""
Helps sequentially retrieve a reward vector at each time step
in the algorithm
@param:
data - pandas df
idx - time step (row) wanted
@return:
reward - reward vector for given time step
"""
def getReward(data, idx):
return data.loc[idx, 1]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment