Commit 479a5796 by 20200318029

homework7

parent d744568b
......@@ -8,6 +8,7 @@ from skimage.transform import resize
from collections import deque
import os.path
import os, time
import cv2
# Define Hyperparameters
FLAGS = tf.flags.FLAGS
......@@ -51,15 +52,15 @@ def atari_model():
# Normalise the inputs from [0,255] to [0,1] - to make processing easier
normalised = keras.layers.Lambda(lambda x: x/255.0, name='normalised')(frames_input)
# Conv1 is 16 8x8 filters with a stride of 4 and a ReLU
conv1 = '?'
conv1 = keras.layers.Conv2D(filters=32, kernel_size=8, strides=4, activation="relu")(normalised)
# Conv2 is 32 4x4 filters with a stride of 2 and a ReLU
conv2 = '?'
conv2 = keras.layers.Conv2D(filters=32, kernel_size=4, strides=2, activation="relu")(conv1)
# Flatten the output from Conv2
conv2_flatten = keras.layers.Flatten()(conv2)
# Then a fully connected layer with 128 ReLU units
dense1 = '?'
dense1 = keras.layers.Dense(units=128, activation="relu")(conv2_flatten)
# Then a fully connected layer with a unit to map to each of the actions and no activation
output = '?'
output = keras.layers.Dense(units=Action_size)(dense1)
# Then we multiply the output by the action mask
# When trying to find the value of all the actions this will be a mask full of 1s
# When trying to find the value of a specific action, the mask will only be 1 for a single action
......@@ -73,7 +74,7 @@ def atari_model():
# Define optimiser
optimiser = tf.train.AdamOptimizer()
# Compile model
loss = '?' # Task-2: to choose the loss function
loss = "mse" # Task-2: to choose the loss function
model.compile(optimizer=optimiser, loss=loss)
# Return the model
return model
......@@ -81,7 +82,7 @@ def atari_model():
# Create a model to use as a target
def atari_model_target():
# Task-3: to implement the code for atari_model_target
model = '?'
model = atari_model()
return model
# get action from model using epsilon-greedy policy
......@@ -116,7 +117,7 @@ def train_memory_batch(memory, model):
# Fill up our arrays with our minibatch
for id, val in enumerate(mini_batch):
state[id] = val[0]
print(val[0].shape)
# print(val[0].shape)
next_state[id] = val[3]
action.append(val[1])
reward.append(val[2])
......@@ -209,7 +210,7 @@ def train():
# Select an action based on our current model
# Task-4: select_model = model_target or select_model = model
select_model = '?'
select_model = model
action = get_action(state_history, epsilon, global_step, select_model)
# Convert action from array numbers to real numbers
......@@ -221,6 +222,8 @@ def train():
# Record output from the environment
observation, reward, done, info = env.step(real_action)
cv2.imshow("", observation)
cv2.waitKey(1)
# Process the observation
next_state = pre_processing(observation)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment