homework7

479a5796 · 20200318029 · d744568b · 479a5796 · 479a5796 · 479a5796
Commit 479a5796 authored Aug 14, 2020 by 20200318029
10 changed files
--- a/homework8/2013 ArXiv Playing Atari with Deep Reinforcement Learning.xml
+++ b/homework8/2013 ArXiv Playing Atari with Deep Reinforcement Learning.xml
--- a/homework8/MK7_train_data/pong_model_0.h5
+++ b/homework8/MK7_train_data/pong_model_0.h5
--- a/homework8/MK7_train_data/pong_model_100.h5
+++ b/homework8/MK7_train_data/pong_model_100.h5
--- a/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597369020.PCGR774
+++ b/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597369020.PCGR774
--- a/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597369415.PCGR774
+++ b/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597369415.PCGR774
--- a/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597369632.PCGR774
+++ b/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597369632.PCGR774
--- a/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597370137.PCGR774
+++ b/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597370137.PCGR774
--- a/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597371618.PCGR774
+++ b/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597371618.PCGR774
--- a/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597372701.PCGR774
+++ b/homework8/MK7_train_data/run-MK10-log/events.out.tfevents.1597372701.PCGR774
--- a/homework8/table_tennis.py
+++ b/homework8/table_tennis.py
@@ -8,6 +8,7 @@ from skimage.transform import resize
 from collections import deque
 import os.path
 import os, time
+import cv2

 # Define Hyperparameters
 FLAGS = tf.flags.FLAGS
@@ -51,15 +52,15 @@ def atari_model():
    # Normalise the inputs from [0,255] to [0,1] - to make processing easier
    normalised = keras.layers.Lambda(lambda x: x/255.0, name='normalised')(frames_input)
    # Conv1 is 16 8x8 filters with a stride of 4 and a ReLU
-    conv1 = '?'
+    conv1 = keras.layers.Conv2D(filters=32, kernel_size=8, strides=4, activation="relu")(normalised)
    # Conv2 is 32 4x4 filters with a stride of 2 and a ReLU
-    conv2 = '?'
+    conv2 = keras.layers.Conv2D(filters=32, kernel_size=4, strides=2, activation="relu")(conv1)
    # Flatten the output from Conv2
    conv2_flatten = keras.layers.Flatten()(conv2)
    # Then a fully connected layer with 128 ReLU units
-    dense1 = '?'
+    dense1 = keras.layers.Dense(units=128, activation="relu")(conv2_flatten)
    # Then a fully connected layer with a unit to map to each of the actions and no activation
-    output =  '?'
+    output = keras.layers.Dense(units=Action_size)(dense1)
    # Then we multiply the output by the action mask
    # When trying to find the value of all the actions this will be a mask full of 1s
    # When trying to find the value of a specific action, the mask will only be 1 for a single action
@@ -73,7 +74,7 @@ def atari_model():
    # Define optimiser
    optimiser = tf.train.AdamOptimizer()
    # Compile model
-    loss = '?'  # Task-2: to choose the loss function
+    loss = "mse"  # Task-2: to choose the loss function
    model.compile(optimizer=optimiser, loss=loss)
    # Return the model
    return model
@@ -81,7 +82,7 @@ def atari_model():
 # Create a model to use as a target
 def atari_model_target():
    # Task-3: to implement the code for atari_model_target
-    model = '?'
+    model = atari_model()
    return model

 # get action from model using epsilon-greedy policy
@@ -116,7 +117,7 @@ def train_memory_batch(memory, model):
    # Fill up our arrays with our minibatch
    for id, val in enumerate(mini_batch):
        state[id] = val[0]
-        print(val[0].shape)
+        # print(val[0].shape)
        next_state[id] = val[3]
        action.append(val[1])
        reward.append(val[2])
@@ -209,7 +210,7 @@ def train():

            # Select an action based on our current model
            # Task-4: select_model = model_target or select_model = model
-            select_model = '?'
+            select_model = model
            action = get_action(state_history, epsilon, global_step, select_model)

            # Convert action from array numbers to real numbers
@@ -221,6 +222,8 @@ def train():

            # Record output from the environment
            observation, reward, done, info = env.step(real_action)
+            cv2.imshow("", observation)
+            cv2.waitKey(1)

            # Process the observation
            next_state = pre_processing(observation)