Commit 479a5796 by 20200318029

homework7

parent d744568b
...@@ -8,6 +8,7 @@ from skimage.transform import resize ...@@ -8,6 +8,7 @@ from skimage.transform import resize
from collections import deque from collections import deque
import os.path import os.path
import os, time import os, time
import cv2
# Define Hyperparameters # Define Hyperparameters
FLAGS = tf.flags.FLAGS FLAGS = tf.flags.FLAGS
...@@ -51,15 +52,15 @@ def atari_model(): ...@@ -51,15 +52,15 @@ def atari_model():
# Normalise the inputs from [0,255] to [0,1] - to make processing easier # Normalise the inputs from [0,255] to [0,1] - to make processing easier
normalised = keras.layers.Lambda(lambda x: x/255.0, name='normalised')(frames_input) normalised = keras.layers.Lambda(lambda x: x/255.0, name='normalised')(frames_input)
# Conv1 is 16 8x8 filters with a stride of 4 and a ReLU # Conv1 is 16 8x8 filters with a stride of 4 and a ReLU
conv1 = '?' conv1 = keras.layers.Conv2D(filters=32, kernel_size=8, strides=4, activation="relu")(normalised)
# Conv2 is 32 4x4 filters with a stride of 2 and a ReLU # Conv2 is 32 4x4 filters with a stride of 2 and a ReLU
conv2 = '?' conv2 = keras.layers.Conv2D(filters=32, kernel_size=4, strides=2, activation="relu")(conv1)
# Flatten the output from Conv2 # Flatten the output from Conv2
conv2_flatten = keras.layers.Flatten()(conv2) conv2_flatten = keras.layers.Flatten()(conv2)
# Then a fully connected layer with 128 ReLU units # Then a fully connected layer with 128 ReLU units
dense1 = '?' dense1 = keras.layers.Dense(units=128, activation="relu")(conv2_flatten)
# Then a fully connected layer with a unit to map to each of the actions and no activation # Then a fully connected layer with a unit to map to each of the actions and no activation
output = '?' output = keras.layers.Dense(units=Action_size)(dense1)
# Then we multiply the output by the action mask # Then we multiply the output by the action mask
# When trying to find the value of all the actions this will be a mask full of 1s # When trying to find the value of all the actions this will be a mask full of 1s
# When trying to find the value of a specific action, the mask will only be 1 for a single action # When trying to find the value of a specific action, the mask will only be 1 for a single action
...@@ -73,7 +74,7 @@ def atari_model(): ...@@ -73,7 +74,7 @@ def atari_model():
# Define optimiser # Define optimiser
optimiser = tf.train.AdamOptimizer() optimiser = tf.train.AdamOptimizer()
# Compile model # Compile model
loss = '?' # Task-2: to choose the loss function loss = "mse" # Task-2: to choose the loss function
model.compile(optimizer=optimiser, loss=loss) model.compile(optimizer=optimiser, loss=loss)
# Return the model # Return the model
return model return model
...@@ -81,7 +82,7 @@ def atari_model(): ...@@ -81,7 +82,7 @@ def atari_model():
# Create a model to use as a target # Create a model to use as a target
def atari_model_target(): def atari_model_target():
# Task-3: to implement the code for atari_model_target # Task-3: to implement the code for atari_model_target
model = '?' model = atari_model()
return model return model
# get action from model using epsilon-greedy policy # get action from model using epsilon-greedy policy
...@@ -116,7 +117,7 @@ def train_memory_batch(memory, model): ...@@ -116,7 +117,7 @@ def train_memory_batch(memory, model):
# Fill up our arrays with our minibatch # Fill up our arrays with our minibatch
for id, val in enumerate(mini_batch): for id, val in enumerate(mini_batch):
state[id] = val[0] state[id] = val[0]
print(val[0].shape) # print(val[0].shape)
next_state[id] = val[3] next_state[id] = val[3]
action.append(val[1]) action.append(val[1])
reward.append(val[2]) reward.append(val[2])
...@@ -209,7 +210,7 @@ def train(): ...@@ -209,7 +210,7 @@ def train():
# Select an action based on our current model # Select an action based on our current model
# Task-4: select_model = model_target or select_model = model # Task-4: select_model = model_target or select_model = model
select_model = '?' select_model = model
action = get_action(state_history, epsilon, global_step, select_model) action = get_action(state_history, epsilon, global_step, select_model)
# Convert action from array numbers to real numbers # Convert action from array numbers to real numbers
...@@ -221,6 +222,8 @@ def train(): ...@@ -221,6 +222,8 @@ def train():
# Record output from the environment # Record output from the environment
observation, reward, done, info = env.step(real_action) observation, reward, done, info = env.step(real_action)
cv2.imshow("", observation)
cv2.waitKey(1)
# Process the observation # Process the observation
next_state = pre_processing(observation) next_state = pre_processing(observation)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment