diff --git a/.gitignore b/.gitignore
index f5ff940..5b22859 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-
 \.idea/
 
 *.pyc
+/experiments/*
diff --git a/README.md b/README.md
index 33cd429..d2b44b1 100644
--- a/README.md
+++ b/README.md
@@ -18,24 +18,30 @@ More detailed information on ai2thor environment can be found on their
 
 <div align="center">
   <img src="docs/bowls_fp_404_compressed_gif.gif" width="294px" />
-  <p>A3C agent learning during training on NaturalLanguagePickUpMultipleObjectTask in one of our customized scenes and tasks with the target object being CUPS!</p>
+  <p>A3C agent training on NaturalLanguagePickUpMultipleObjectTask in one of our customized scenes and tasks with the target object being CUPS!</p>
 </div>
 
-## Overview
+## Running algorithms on ai2thor
 
 This project will include implementations and adaptations of the following papers as a benchmark of 
 the current state of the art approaches to the problem:
 
-- [Ikostrikov's A3C](https://github.com/ikostrikov/pytorch-a3c)
+- [A3C](https://arxiv.org/abs/1602.01783) [Code from Ikostrikov](https://github.com/ikostrikov/pytorch-a3c)
 - [Gated-Attention Architectures for Task-Oriented Language Grounding](https://arxiv.org/abs/1706.07230) 
--- *Original code available on [DeepRL-Grounding](https://github.com/devendrachaplot/DeepRL-Grounding)* 
-also based on Ikostrikov's A3C
+-- A3C with gated attention (A3C_LSTM_GA) *Original code available on [DeepRL-Grounding](https://github.com/devendrachaplot/DeepRL-Grounding)* 
+also based on A3C made by Ikostrikov.
 
 Implementations of these can be found in the algorithms folder and a3c can be run on AI2ThorEnv with:  
-`python algorithms/a3c/main.py`
+- `python algorithms/a3c/main.py`
+- For running a config file which is set to the BowlsVsCups variant of the NaturalLanguagePickUpObjectTask in tasks.py for running A3C_LSTM_GA model:  
+`python algorithms/a3c/main.py --config-file-name NL_pickup_bowls_vs_cups_fp1_config.json --verbose-num-steps True --num-random-actions-at-init 4`
+- For running [ViZDoom](https://github.com/mwydmuch/ViZDoom) (you will need to install ViZDoom) synchronous with 1 process:  
+`python algorithms/a3c/main.py --verbose-num-steps True --sync --vizdoom -v 1`
+- For running atari with 8 processes:  
+`python algorithms/a3c/main.py --atari --num-processes 8`
 
-Check the argparse help for more details and variations of running the algorithm with different 
-hyperparams and on the atari environment as well.
+For A3C's `-eid` param you can specify experiment names which will create folders for checkpointing and hyperparameters, otherwise experiment name is the current date and a concatenated random guid. Check the argparse help for more details and variations of running the algorithm with different 
+hyperparams. 
 
 ## Installation
 
@@ -74,9 +80,11 @@ for episode in range(N_EPISODES):
 
 ### Environment and Task configurations
 
+##### JSON config files and config_dict
+
 The environment is typically defined by a JSON configuration file located on the `gym_ai2thor/config_files` 
-folder. You can find an example `config_example.json` to see how to customize it. Here there is one
-as well:
+folder. You can find a full example at `default_config.json` to see how to customize it. Here there is 
+another one as well:
 
 ```
 # gym_ai2thor/config_files/myconfig.json
@@ -86,6 +94,8 @@ as well:
  'acceptable_receptacles': ['CounterTop', 'TableTop', 'Sink'],
  'openable_objects': ['Microwave'],
  'scene_id': 'FloorPlan28',
+ 'gridSize': 0.1,
+ 'continuous_movement': true,
  'grayscale': True,
  'resolution': (300, 300),
  'task': {'task_name': 'PickUp',
@@ -95,7 +105,11 @@ as well:
 For experimentation it is important to be able to make slight modifications of the environment 
  without having to create a new config file each time. The class `AI2ThorEnv` includes the keyword 
  argument `config_dict`, that allows to input a python dictionary **in addition to** the config file 
- that overrides the parameters described in the config.
+ that overrides the parameters described in the config. In summary, the full interface to the constructor:  
+ 
+ `env = AI2ThorEnv(env = AI2ThorEnv(config_file=config_file_name, config_dict=config_dict))` 
+
+##### Tasks and TaskFactory
 
 The tasks are defined in `envs/tasks.py` and allow for particular configurations regarding the 
 rewards given and termination conditions for an episode. You can use the tasks that we defined
@@ -128,11 +142,19 @@ class MoveAheadTask(BaseTask):
 
     def reset(self):
         self.step_num = 0
-``` 
+```
+
+Some tasks allow you return extra state by filling in the get_extra_state() function (e.g. for returning a Natural Language instruction within the state). Again, check 
+tasks.py for more details.
+
+##### Examples and Task variants
 
 We encourage you to explore the scripts on the `examples` folder to guide you on the wrapper
  functionalities and explore how to create more customized versions of ai2thor environments and 
  tasks. 
+ 
+ And most importantly, config files and tasks can be combined together to form **Task variants** e.g. NaturalLanguagePickUpObjectTask but only allowing 
+ cups and bowls to be picked up hence: `gym_ai2thor/config_files/NL_pickup_bowls_vs_cups_fp1_config.json`
 
 Here is the desired result of an example task in which the goal of the agent is to place a cup in the 
 sink.
@@ -145,7 +167,7 @@ sink.
 
 ## The Team
 
-[The M Tank](http://www.themtank.org/) is a non-partisan organisation that works solely to recognise the multifaceted 
+[MTank](http://www.themtank.org/) is a non-partisan organisation that works solely to recognise the multifaceted 
 nature of Artificial Intelligence research and to highlight key developments within all sectors affected by these 
 advancements. Through the creation of unique resources, the combination of ideas and their provision to the public, 
 this project hopes to encourage the dialogue which is beginning to take place globally. 
diff --git a/algorithms/a3c/envs.py b/algorithms/a3c/env_atari.py
similarity index 63%
rename from algorithms/a3c/envs.py
rename to algorithms/a3c/env_atari.py
index 0f17edd..b0ec674 100644
--- a/algorithms/a3c/envs.py
+++ b/algorithms/a3c/env_atari.py
@@ -4,11 +4,17 @@
 This contains auxiliary wrappers for the atari openAI gym environment e.g. proper resizing of the
 input frame and a running average normalisation of said frame after resizing
 """
+
+from __future__ import print_function
+
 import cv2
 import gym
 import numpy as np
-from gym.spaces.box import Box
+from gym import spaces
 
+# -----------------
+# Atari preprocessing and wrappers below
+# -----------------
 
 # Taken from https://github.com/openai/universe-starter-agent
 def create_atari_env(env_id):
@@ -18,27 +24,27 @@ def create_atari_env(env_id):
     return env
 
 
-def _process_frame42(frame):
-    frame = frame[34:34 + 160, :160]
-    # Resize by half, then down to 42x42 (essentially mipmapping). If
-    # we resize directly we lose pixels that, when mapped to 42x42,
-    # aren't close enough to the pixel boundary.
-    frame = cv2.resize(frame, (80, 80))
-    frame = cv2.resize(frame, (42, 42))
-    frame = frame.mean(2, keepdims=True)
-    frame = frame.astype(np.float32)
-    frame *= (1.0 / 255.0)
-    frame = np.moveaxis(frame, -1, 0)
-    return frame
-
-
 class AtariRescale42x42(gym.ObservationWrapper):
     def __init__(self, env=None):
         super(AtariRescale42x42, self).__init__(env)
-        self.observation_space = Box(0.0, 1.0, [1, 42, 42])
+        self.observation_space = spaces.Box(0.0, 1.0, [1, 42, 42])
+
+    @staticmethod
+    def _process_frame42(frame):
+        frame = frame[34:34 + 160, :160]
+        # Resize by half, then down to 42x42 (essentially mipmapping). If
+        # we resize directly we lose pixels that, when mapped to 42x42,
+        # aren't close enough to the pixel boundary.
+        frame = cv2.resize(frame, (80, 80))
+        frame = cv2.resize(frame, (42, 42))
+        frame = frame.mean(2, keepdims=True)
+        frame = frame.astype(np.float32)
+        frame *= (1.0 / 255.0)
+        frame = np.moveaxis(frame, -1, 0)
+        return frame
 
     def _observation(self, observation):
-        return _process_frame42(observation)
+        return self._process_frame42(observation)
 
 
 class NormalizedEnv(gym.ObservationWrapper):
diff --git a/algorithms/a3c/env_vizdoom.py b/algorithms/a3c/env_vizdoom.py
new file mode 100644
index 0000000..c2c6b77
--- /dev/null
+++ b/algorithms/a3c/env_vizdoom.py
@@ -0,0 +1,396 @@
+"""
+Adapted from: https://github.com/devendrachaplot/DeepRL-Grounding/blob/master/env.py
+
+This countains the GroundingEnv() with some additions to make it work with the ai2thor and atari
+code e.g. the small Task class because the real work was kept
+"""
+
+from __future__ import print_function
+from time import sleep
+import collections
+import codecs
+import json
+import warnings
+import os
+import random
+
+import numpy as np
+from gym import spaces
+
+from algorithms.a3c.vizdoom_utils.doom import DoomObject, DoomGame, split_object, get_l2_distance, \
+    get_agent_location, process_screen, spawn_object, spawn_agent, set_doom_configuration
+from algorithms.a3c.vizdoom_utils.points import generate_points
+from algorithms.a3c.vizdoom_utils.constants import CORRECT_OBJECT_REWARD, \
+    REWARD_THRESHOLD_DISTANCE, WRONG_OBJECT_REWARD, EASY_ENV_OBJECT_X, \
+    X_OFFSET, Y_OFFSET, MAP_SIZE_X, MAP_SIZE_Y, OBJECT_Y_DIST, MEDIUM_ENV_OBJECT_X_MIN, \
+    MEDIUM_ENV_OBJECT_X_MAX, HARD_ENV_OBJ_DIST_THRESHOLD, MARGIN, SIZE_THRESHOLD
+
+actions = [[True, False, False], [False, True, False], [False, False, True]]
+
+ObjectLocation = collections.namedtuple("ObjectLocation", ["x", "y"])
+AgentLocation = collections.namedtuple("AgentLocation", ["x", "y", "theta"])
+
+
+# todo inherit from BaseTask and rename get_reward?
+class Task:
+    # extra addition to make it work with ai2thor code
+    def __init__(self):
+        self.has_language_instructions = True
+
+
+class GroundingEnv:
+    def __init__(self, args):
+        """Initializes the environment.
+        Args:
+          args: dictionary of parameters. Contains vizdoom specific params below:
+
+          train_instr_file: path to training json file with instructions which is a list of objects
+                with keys: "instruction": "Go to the red short object",
+                           "targets": [
+                               "ShortRedTorch",
+                               "ShortRedColumn"
+                           ],
+                           "description": "red short object"
+          test_instr_file: Same as above but for testing
+          all_instr_file: Same as above but contains all the instructions to calculate object lists
+          use_train_instructions: Boolean for whether to use training instructions or test
+          frame_height and frame_width: resolution
+          scenario_path: Doom scenario file to load (default: vizdoom_maps/room.wad)
+          visualize: Visualize the environment
+          sleep: Sleep between frames for better visualization (default: 0)
+          living_reward: Default reward at each time step (default: 0,
+                         change to -0.005 to encourage shorter paths)
+          difficulty: easy, medium or hard (explained below)
+        """
+        self.params = args
+        self.curr_file_folder = os.path.dirname(os.path.realpath(__file__))
+
+        # Reading train and test instructions, relative converted to absolute path
+        self.train_instructions = self.get_instr(os.path.join(self.curr_file_folder,
+                                                 self.params.train_instr_file))
+        self.test_instructions = self.get_instr(os.path.join(self.curr_file_folder,
+                                                             self.params.test_instr_file))
+
+        if self.params.use_train_instructions:
+            self.instructions = self.train_instructions
+        else:
+            self.instructions = self.test_instructions
+
+        self.word_to_idx = self.get_word_to_idx()
+        self.objects, self.object_dict = \
+            self.get_all_objects(os.path.join(self.curr_file_folder, self.params.all_instr_file))
+        self.object_sizes = self.read_size_file(os.path.join(self.curr_file_folder,
+                                                             self.params.object_size_file))
+        self.objects_info = self.get_objects_info()
+
+        # extra stuff below to make it work with ai2thor A3C
+        self.task = Task()
+        self.observation_space = spaces.Box(low=0, high=255,
+                                            shape=(3, self.params.frame_height,
+                                                   self.params.frame_width),
+                                            dtype=np.uint8)
+        self.action_space = spaces.Discrete(3)
+        self.task.word_to_idx = self.word_to_idx
+
+    def game_init(self):
+        """Starts the doom game engine."""
+        game = DoomGame()
+        self.params.scenario_path = os.path.join(self.curr_file_folder, self.params.scenario_path)
+        game = set_doom_configuration(game, self.params)
+        game.init()
+        self.game = game
+
+    def reset(self):
+        """Starts a new episode.
+        Returns:
+           state: A tuple of screen buffer state and instruction.
+           reward: Reward at that step.
+        """
+
+        self.game.new_episode()
+        self.time = 0
+
+        self.instruction, instruction_id = self.get_random_instruction()
+
+        # Retrieve the possible correct objects for the instruction.
+        correct_objects = self.get_target_objects(instruction_id)
+
+        # Since we fix the number of objects to 5.
+        self.correct_location = np.random.randint(5)
+
+        # Randomly select one correct object from the
+        # list of possible correct objects for the instruction.
+        correct_object_id = np.random.choice(correct_objects)
+        chosen_correct_object = [x for x in self.objects_info if
+                                 x.name == self.objects[correct_object_id]][0]
+
+        # Special code to handle 'largest' and 'smallest' since we need to
+        # compute sizes for those particular instructions.
+        if 'largest' not in self.instruction and 'smallest' not in self.instruction:
+            object_ids = random.sample([x for x in range(len(self.objects))
+                                        if x not in correct_objects], 4)
+        else:
+            object_ids = self.get_candidate_objects_superlative_instr(
+                         chosen_correct_object)
+            object_ids = [self.object_dict[x] for x in object_ids]
+
+        assert len(object_ids) == 4
+
+        object_ids.insert(self.correct_location, correct_object_id)
+
+        # Get agent and object spawn locations.
+        agent_x_coordinate, agent_y_coordinate, \
+            agent_orientation, object_x_coordinates, \
+            object_y_coordinates = self.get_agent_and_object_positions()
+
+        self.object_coordinates = [ObjectLocation(x, y) for x, y in
+                                   zip(object_x_coordinates,
+                                       object_y_coordinates)]
+
+        # Spawn agent.
+        spawn_agent(self.game, agent_x_coordinate,
+                    agent_y_coordinate, agent_orientation)
+
+        # Spawn objects.
+        [spawn_object(self.game, object_id, pos_x, pos_y) for
+            object_id, pos_x, pos_y in
+            zip(object_ids, object_x_coordinates, object_y_coordinates)]
+
+        screen = self.game.get_state().screen_buffer
+        screen_buf = process_screen(screen, self.params.frame_height,
+                                    self.params.frame_width)
+
+        state = (screen_buf, self.instruction)
+
+        print('New instruction: {}'.format(self.instruction))
+
+        # this function used to return a tuple of size 4, get_reward() was calculated
+        # but this was removed to make it the same as OpenAI gym style for maximum flexibility
+        return state
+
+    def step(self, action_id):
+        """Executes an action in the environment to reach a new state.
+        Args:
+          action_id: An integer corresponding to the action.
+        Returns:
+           state: A tuple of screen buffer state and instruction.
+           reward: Reward at that step.
+           is_final: Flag indicating terminal state.
+           extra_args: Dictionary of additional arguments/parameters.
+        """
+        # Repeat the action for 5 frames.
+        if self.params.visualize:
+            # Render 5 frames for better visualization.
+            for _ in range(5):
+                self.game.make_action(actions[action_id], 1)
+                # Slowing down the game for better visualization.
+                sleep(self.params.sleep)
+        else:
+            self.game.make_action(actions[action_id], 5)
+
+        self.time += 1
+        reward = self.get_reward()
+
+        # End the episode if episode limit is reached or
+        # agent reached an object.
+        is_final = True if self.time == self.params.max_episode_length \
+            or reward != self.params.living_reward else False
+
+        screen = self.game.get_state().screen_buffer
+        screen_buf = process_screen(
+            screen, self.params.frame_height, self.params.frame_width)
+
+        state = (screen_buf, self.instruction)
+
+        return state, reward, is_final, None
+
+    def close(self):
+        self.game.close()
+
+    def get_reward(self):
+        """Get the reward received by the agent in the last time step."""
+        # If agent reached the correct object, reward = +1.
+        # If agent reach a wrong object, reward = -0.2.
+        # Otherwise, reward = living_reward.
+        self.agent_x, self.agent_y = get_agent_location(self.game)
+        target_location = self.object_coordinates[self.correct_location]
+        dist = get_l2_distance(self.agent_x, self.agent_y,
+                               target_location.x, target_location.y)
+        if dist <= REWARD_THRESHOLD_DISTANCE:
+            reward = CORRECT_OBJECT_REWARD
+        else:
+            for i, object_location in enumerate(self.object_coordinates):
+                if i == self.correct_location:
+                    continue
+                dist = get_l2_distance(self.agent_x, self.agent_y,
+                                       object_location.x, object_location.y)
+                if dist <= REWARD_THRESHOLD_DISTANCE:
+                    reward = WRONG_OBJECT_REWARD
+                    return reward
+            reward = self.params.living_reward
+
+        return reward
+
+    def get_agent_and_object_positions(self):
+        """
+        Get agent and object positions based on the difficulty
+        of the environment.
+        """
+        object_x_coordinates = []
+        object_y_coordinates = []
+
+        # Agent location fixed in Easy and medium
+        agent_x_coordinate, agent_y_coordinate, agent_orientation = 128, 512, 0
+
+        if self.params.difficulty == 'easy':
+            # Candidate object locations are fixed in Easy.
+            object_x_coordinates = [EASY_ENV_OBJECT_X] * 5
+            for i in range(-2, 3):
+                object_y_coordinates.append(
+                    Y_OFFSET + MAP_SIZE_Y/2.0 + OBJECT_Y_DIST * i)
+        elif self.params.difficulty == 'medium':
+            # Generate 5 candidate object locations.
+            for i in range(-2, 3):
+                object_x_coordinates.append(np.random.randint(
+                    MEDIUM_ENV_OBJECT_X_MIN, MEDIUM_ENV_OBJECT_X_MAX))
+                object_y_coordinates.append(
+                    Y_OFFSET + MAP_SIZE_Y/2.0 + OBJECT_Y_DIST * i)
+        elif self.params.difficulty == 'hard':
+            # Generate 6 random locations: 1 for agent starting position
+            # and 5 for candidate objects.
+            random_locations = generate_points(HARD_ENV_OBJ_DIST_THRESHOLD,
+                                               MAP_SIZE_X - 2*MARGIN,
+                                               MAP_SIZE_Y - 2*MARGIN, 6)
+
+            agent_x_coordinate = random_locations[0][0] + X_OFFSET + MARGIN
+            agent_y_coordinate = random_locations[0][1] + Y_OFFSET + MARGIN
+            agent_orientation = np.random.randint(4)
+
+            for i in range(1, 6):
+                object_x_coordinates.append(
+                    random_locations[i][0] + X_OFFSET + MARGIN)
+                object_y_coordinates.append(
+                    random_locations[i][1] + Y_OFFSET + MARGIN)
+
+        return agent_x_coordinate, agent_y_coordinate, agent_orientation, \
+            object_x_coordinates, object_y_coordinates
+
+    def get_candidate_objects_superlative_instr(self, correct_object):
+        """
+        Get any possible combination of objects
+        and give the maximum size
+        SIZE_THRESHOLD refers to the size in terms of number of pixels so that
+        atleast there is minimum size difference between two objects for
+        instructions with superlative terms (largest and smallest)
+        These sizes are stored in ../data/object_sizes.txt
+
+        instr_contains_color is True if the instruction contains the color
+        attribute (e.g.) "Go to the largest green object".
+        instr_contains_color is False if the instruction doesn't contain the
+        color attribute. (e.g.) "Go to the smallest object"
+        """
+
+        instr_contains_color = False
+
+        instruction_words = self.instruction.split()
+        if len(instruction_words) == 6 and \
+                instruction_words[-1] == 'object':
+            instr_contains_color = True
+
+        output_objects = []
+
+        # For instructions like "largest red object", the incorrect object
+        # set could contain larger objects of other color
+
+        for obj in self.objects_info:
+            if instr_contains_color:
+                # first check color attribute
+                if correct_object.color != obj.color:
+                    output_objects.append(obj)
+
+            if instruction_words[3] == 'largest':
+                if correct_object.absolute_size > \
+                        obj.absolute_size + SIZE_THRESHOLD:
+                    output_objects.append(obj)
+
+            else:
+                if correct_object.absolute_size < \
+                        obj.absolute_size - SIZE_THRESHOLD:
+                    output_objects.append(obj)
+
+        # shuffle the objects and select the top 4
+        # randomizing the objects combination
+        random.shuffle(output_objects)
+        return [x.name for x in output_objects[:4]]
+
+    def get_objects_info(self):
+        # todo docstring
+        objects = []
+        objects_map = self.objects
+        for obj in objects_map:
+            split_word = split_object(obj)
+            candidate_object = DoomObject(*split_word)
+            candidate_object.absolute_size = self.object_sizes[obj]
+            objects.append(candidate_object)
+
+        return objects
+
+    def get_all_objects(self, filename):
+        # todo docstring
+        objects = []
+        object_dict = {}
+        count = 0
+        instructions = self.get_instr(filename)
+        for instruction_data in instructions:
+            object_names = instruction_data['targets']
+            for object_name in object_names:
+                if object_name not in objects:
+                    objects.append(object_name)
+                    object_dict[object_name] = count
+                    count += 1
+
+        return objects, object_dict
+
+    def get_target_objects(self, instr_id):
+        object_names = self.instructions[instr_id]['targets']
+        correct_objects = []
+        for object_name in object_names:
+            correct_objects.append(self.object_dict[object_name])
+
+        return correct_objects
+
+    def get_instr(self, filename):
+        with open(filename, 'rb') as f:
+            instructions = json.load(f)
+        return instructions
+
+    def read_size_file(self, filename):
+        with codecs.open(filename, 'r') as open_file:
+            lines = open_file.readlines()
+
+        object_sizes = {}
+        for i, line in enumerate(lines):
+            split_line = line.split('\t')
+            if split_line[1].strip() in self.objects:
+                object_sizes[split_line[1].strip()] = int(split_line[2])
+
+        return object_sizes
+
+    def get_random_instruction(self):
+        instruction_id = np.random.randint(len(self.instructions))
+        instruction = self.instructions[instruction_id]['instruction']
+
+        return instruction, instruction_id
+
+    def get_word_to_idx(self):
+        word_to_idx = {}
+        for instruction_data in self.train_instructions:
+            instruction = instruction_data['instruction']
+            for word in instruction.split(" "):
+                if word not in word_to_idx:
+                    word_to_idx[word] = len(word_to_idx)
+        return word_to_idx
+
+    def seed(self, seed):
+        warnings.warn('seed() not implemented for GroundingEnv(). Only here so it does not crash'
+                      'and for compatibility with other Gym environments')
diff --git a/algorithms/a3c/main.py b/algorithms/a3c/main.py
index b9faf5e..18600fa 100644
--- a/algorithms/a3c/main.py
+++ b/algorithms/a3c/main.py
@@ -1,5 +1,6 @@
 """
-Adapted from: https://github.com/ikostrikov/pytorch-a3c/blob/master/main.py
+ActorCritic adapted from: https://github.com/ikostrikov/pytorch-a3c/blob/master/main.py
+A3C_LSTM_GA adapted from: https://github.com/devendrachaplot/DeepRL-Grounding/blob/master/models.py
 The main file needed within a3c. Runs of the train and test functions from their respective files.
 Example of use:
 `cd algorithms/a3c`
@@ -10,29 +11,45 @@
 """
 
 from __future__ import print_function
-
 import argparse
 import os
+# todo does this cause the writer problem?
+os.environ["OMP_NUM_THREADS"] = "1"  # fixes multiprocessing error on some systems
+import uuid
+import glob
+import json
+import datetime
+import sys
 
 import torch
 import torch.multiprocessing as mp
+from tensorboardX import SummaryWriter
 
 from gym_ai2thor.envs.ai2thor_env import AI2ThorEnv
-from algorithms.a3c.envs import create_atari_env
+from algorithms.a3c.env_atari import create_atari_env
 from algorithms.a3c import my_optim
-from algorithms.a3c.model import ActorCritic
+from algorithms.a3c.model import ActorCritic, A3C_LSTM_GA
 from algorithms.a3c.test import test
 from algorithms.a3c.train import train
 
-# Based on
-# https://github.com/pytorch/examples/tree/master/mnist_hogwild
-# Training settings
-parser = argparse.ArgumentParser(description='A3C')
+
+# Based on https://github.com/pytorch/examples/tree/master/mnist_hogwild
+# Training/testing settings below
+parser = argparse.ArgumentParser(description='A3C/A3C_GA')
+# A3C settings
 parser.add_argument('--lr', type=float, default=0.0001,
                     help='learning rate (default: 0.0001)')
 parser.add_argument('--gamma', type=float, default=0.99,
-                    help='discount factor for rewards (default: 0.99)')
-parser.add_argument('--tau', type=float, default=1.00,
+                    help='discount factor for returns (default: 0.99)')
+# todo find out why it is higher than gamma?
+'''
+https://arxiv.org/pdf/1506.02438.pdf
+. On the other hand, λ < 1
+introduces bias only when the value function is inaccurate. Empirically, we find that the best value
+of λ is much lower than the best value of 
+Created an issue: https://github.com/ikostrikov/pytorch-a3c/issues/60
+'''
+parser.add_argument('--gae-lambda', type=float, default=1.00,
                     help='parameter for GAE (default: 1.00)')
 parser.add_argument('--entropy-coef', type=float, default=0.01,
                     help='entropy term coefficient (default: 0.01)')
@@ -40,80 +57,293 @@
                     help='value loss coefficient (default: 0.5)')
 parser.add_argument('--max-grad-norm', type=float, default=50,
                     help='value loss coefficient (default: 50)')
-parser.add_argument('--seed', type=int, default=1,
-                    help='random seed (default: 1)')
-parser.add_argument('--test-sleep-time', type=int, default=200,
-                    help='number of seconds to wait before testing again (default: 200)')
-parser.add_argument('--num-processes', type=int, default=4,
-                    help='how many training processes to use (default: 1)')
-parser.add_argument('--num-steps', type=int, default=20,
-                    help='number of forward steps in A3C (default: 20)')
-parser.add_argument('--max-episode-length', type=int, default=1000,
-                    help='maximum length of an episode (default: 1000000)')
 parser.add_argument('--no-shared', default=False,
                     help='use an optimizer without shared momentum.')
+parser.add_argument('--num-processes', type=int, default=4,
+                    help='how many training processes to use (default: 4) except if synchronous')
 parser.add_argument('-sync', '--synchronous', dest='synchronous', action='store_true',
-                    help='Useful for debugging purposes e.g. import pdb; pdb.set_trace(). '
+                    help='Useful for debugging purposes e.g. import pdb; pdb.set_trace()'
                          'Overwrites args.num_processes as everything is in main thread. '
                          '1 train() function is run and no test()')
 parser.add_argument('-async', '--asynchronous', dest='synchronous', action='store_false')
 parser.set_defaults(synchronous=False)
 
+# experiment, environment and logging setting
+parser.add_argument('-eid', '--experiment-id', default=False,
+                    help='random or chosen guid for folder creation for plots and checkpointing.'
+                         ' If experiment folder and id taken, will resume training!')
+parser.add_argument('--seed', type=int, default=1,
+                    help='random seed (default: 1)')
+parser.add_argument('--test-sleep-time', type=int, default=200,
+                    help='number of seconds to wait before testing again (default: 200)')
+parser.add_argument('--checkpoint-freq', type=int, default=100000,
+                    help='number of episodes passed for resuming')
+parser.add_argument('--verbose-num-steps', default=False,
+                    help='print step number every args.num_steps')
+parser.add_argument('--num-steps', type=int, default=20,
+                    help='number of forward steps in A3C (default: 20)')
+
+
+# Game settings below for ai2thor, atari and vizdoom. Choose between 3 with args.env
+parser.add_argument('--env', type=str, default='ai2thor',
+                    help='Choose between 3 options for the env: ai2thor, atari and vizdoom')
+parser.add_argument('--max-episode-length', type=int, default=1000,
+                    help='maximum length of an episode (default: 1000)')
+# ai2thor settings
+parser.add_argument('--config-file-name', default='default_config.json',
+                    help='File must be in gym_ai2thor/config_files. Other good variants with '
+                         'natural language are: NL_pickup_5_objects_surround_fp_211_v0.1.json and'
+                         'NL_pickup_multiple_cups_only_fp404_v0.1.json')
+parser.add_argument('--resume-latest-config', default=1,
+                    help='Whether to resume latest_config.json found in experiment folder'
+                         'Default is set so user cannot override settings from text file')
+parser.add_argument('--num-random-actions-at-init', type=int, default=0,
+                    help='Number of random actions the agent does on initialisation')
+
 # Atari arguments. Good example of keeping code modular and allowing algorithms to run everywhere
-parser.add_argument('--atari', dest='atari', action='store_true',
-                    help='Run atari env instead with name below instead of ai2thor')
 parser.add_argument('--atari-render', dest='atari_render', action='store_true',
                     help='Render atari')
 parser.add_argument('--atari-env-name', default='PongDeterministic-v4',
                     help='environment to train on (default: PongDeterministic-v4)')
-#
 parser.set_defaults(atari=False)
 parser.set_defaults(atari_render=False)
 
+# VizDoom arguments
+parser.add_argument('-d', '--difficulty', type=str, default="hard",
+                    help="""Difficulty of the environment,
+                    "easy", "medium" or "hard" (default: hard)""")
+parser.add_argument('--living-reward', type=float, default=0,
+                    help="""Default reward at each time step (default: 0,
+                    change to -0.005 to encourage shorter paths)""")
+parser.add_argument('--frame-width', type=int, default=300,
+                    help='Frame width (default: 300)')
+parser.add_argument('--frame-height', type=int, default=168,
+                    help='Frame height (default: 168)')
+parser.add_argument('-v', '--visualize', type=int, default=0,
+                    help="""Visualize the environment (default: 0,
+                    use 0 for faster training)""")
+parser.add_argument('--sleep', type=float, default=0,
+                    help="""Sleep between frames for better
+                    visualization (default: 0)""")
+parser.add_argument('--scenario-path', type=str, default="vizdoom_maps/room.wad",
+                    help="""Doom scenario file to load
+                    (default: vizdoom_maps/room.wad)""")
+parser.add_argument('--interactive', type=int, default=0,
+                    help="""Interactive mode enables human to play
+                    (default: 0)""")
+parser.add_argument('--all-instr-file', type=str,
+                    default="vizdoom_data/instructions_all.json",
+                    help="""All instructions file path relative to a3c folder
+                    (default: vizdoom_data/instructions_all.json)""")
+parser.add_argument('--train-instr-file', type=str,
+                    default="vizdoom_data/instructions_train.json",
+                    help="""Train instructions file path relative to a3c folder
+                    (default: vizdoom_data/instructions_train.json)""")
+parser.add_argument('--test-instr-file', type=str,
+                    default="vizdoom_data/instructions_test.json",
+                    help="""Test instructions file path relative to a3c folder
+                    (default: vizdoom_data/instructions_test.json)""")
+parser.add_argument('--object-size-file', type=str,
+                    default="vizdoom_data/object_sizes.txt",
+                    help='Object size file path relative to a3c folder '
+                         '(default: data/object_sizes.txt)')
+parser.add_argument('-e', '--evaluate', type=int, default=0,
+                    help="""0:Train, 1:Evaluate MultiTask Generalization
+                    2:Evaluate Zero-shot Generalization (default: 0)
+                    async must be on and will only run test function""")
 
 if __name__ == '__main__':
-    os.environ['OMP_NUM_THREADS'] = '1'
+    # A3C was designed to be efficient for CPU. For a GPU version checked batched A2C or GA3C
     os.environ['CUDA_VISIBLE_DEVICES'] = ""
 
+    # todo atari unbiased mean
+    # todo synchronous comments fix and understanding omp etc
+    # todo default atari not working (arg parse). why false needs to be specified?
+    # todo logs to file?
+
     args = parser.parse_args()
+    # set to 0 so that checkpoint resume can overwrite if necessary
+    args.episode_number = 0
+    args.total_length = 0
+    args.num_backprops = 0
+    """
+    If no experiment id is specified we concatenate current date and random uuid. A folder will be 
+    created in algorithms/a3c/experiments for this and tensorboard_logs, checkpoints, latest args 
+    and config will be stored within there. Latest checkpoint can be resume by specifiying the 
+    checkpoint again. 
+    """
+    if not args.experiment_id:
+        args.experiment_id = datetime.datetime.now().strftime("%Y-%m-%d-") \
+                                                                     + str(uuid.uuid4())
+    args.experiment_path = os.path.abspath(os.path.join(os.path.dirname(__file__),
+                          'experiments', args.experiment_id))
+    args.checkpoint_path = os.path.join(args.experiment_path, 'checkpoints')
+    args.tensorboard_path = os.path.join(args.experiment_path, 'tensorboard_logs')
 
-    torch.manual_seed(args.seed)
-    if args.atari:
+    # Create environment from choices of Atari, ViZDoom and our ai2thor wrapper with tasks
+    if args.env == 'atari':
         env = create_atari_env(args.atari_env_name)
-        args.frame_dim = 42  # fixed to be 42x42 in envs.py _process_frame42()
+        args.resolution = (42, 42)  # fixed to be 42x42 for _process_frame42() in envs.py
+    elif args.env == 'vizdoom':
+        # many more dependencies required for VizDoom so therefore only import here
+        from algorithms.a3c.env_vizdoom import GroundingEnv
+
+        if args.evaluate == 0:
+            args.use_train_instructions = 1
+        elif args.evaluate == 1:
+            args.use_train_instructions = 1
+            args.num_processes = 0
+        elif args.evaluate == 2:
+            args.use_train_instructions = 0
+            args.num_processes = 0
+        else:
+            assert False, "Invalid evaluation type"
+
+        env = GroundingEnv(args)
+        env.game_init()
+        args.resolution = (args.frame_width, args.frame_height)
+    elif args.env == 'ai2thor':
+        """if you resume a checkpoint and if args.resume_latest_config == True, changes to the file
+           will be overwritten by the latest_config.json file. Set to False if you want to change
+           config settings in the config file after resuming and in the middle of training 
+        """
+        args.last_config_resume_path = os.path.join(args.experiment_path, 'latest_config.json')
+        args.config_dict = {}
+        if args.resume_latest_config:
+            print('args.resume_latest_config is True. Attempting to resume latest config for env')
+            # read last_config.json if it exists, else create it below
+            if os.path.exists(args.last_config_resume_path):
+                print('Folder for this args.experiment_id "{}" and latest_config file existed'
+                      .format(args.experiment_id))
+                with open(args.last_config_resume_path, 'r') as f:
+                    print('Loading config_dict from file: {}'.format(args.last_config_resume_path))
+                    print('Changes to args.config_file_name won\'t affect the env.\nSet '
+                          'args.resume_latest_config to 0 to override args.config_file with params')
+                    args.config_dict = json.load(f)
+        else:
+            print('Loading default params from args.config_file_name. '
+                  'Note that these will be overridden if other config args were given')
+
+        # random actions on reset to encourage robustness
+        args.config_dict['num_random_actions_at_init'] = args.num_random_actions_at_init
+        args.config_dict['max_episode_length'] = args.max_episode_length
+        # use given config file to start config and then allow config_dict to overwrite values
+        config_file_dir_path = os.path.abspath(os.path.join(__file__, '../../..', 'gym_ai2thor',
+                                                            'config_files'))
+        args.config_file_path = os.path.join(config_file_dir_path, args.config_file_name)
+        env = AI2ThorEnv(config_file=args.config_file_path, config_dict=args.config_dict)
+        args.resolution = (env.config['resolution'][0], env.config['resolution'][1])
     else:
-        args.config_dict = {'max_episode_length': args.max_episode_length}
-        env = AI2ThorEnv(config_dict=args.config_dict)
-        args.frame_dim = env.config['resolution'][-1]
-    shared_model = ActorCritic(env.observation_space.shape[0], env.action_space.n, args.frame_dim)
-    shared_model.share_memory()
+        raise ValueError('args.env variable needs to be set to ai2thor, vizdoom or atari')
 
-    env.close()  # above env initialisation was only to find certain params needed
+    # Create shared model. A3C uses a shared model and gradients are sent to this model
+    # from the models within the threads and the shared model is optimised asynchronously with these
+    if env.task.has_language_instructions:
+        # environment will return natural language sentence as part of state so process it with
+        # Gated Attention (GA) variant of A3C
+        shared_model = A3C_LSTM_GA(env.observation_space.shape[0], env.action_space.n,
+                                   args.resolution, len(env.task.word_to_idx),
+                                   args.max_episode_length)
+    else:
+        shared_model = ActorCritic(env.observation_space.shape[0], env.action_space.n,
+                                   args.resolution)
+    shared_model.share_memory()
 
+    # Create optimizer
     if args.no_shared:
         optimizer = None
     else:
         optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
         optimizer.share_memory()
 
-    processes = []
+    env.close()  # above env initialisation was only to find certain params needed for models
+
+    # Checkpoint creation/loading below
+    checkpoint_counter = False
+    if not os.path.exists(args.checkpoint_path):
+        print('Creating tensorboard experiment folder: {} and checkpoint folder '
+              'made here: {}'.format(args.experiment_path, args.checkpoint_path))
+        os.makedirs(args.checkpoint_path)
+    else:
+        print('Using existing checkpoint folder at path: {}'.format(args.checkpoint_path))
+        # Load model_best.pth.tar if it exists
+        checkpoint_paths = glob.glob(os.path.join(args.checkpoint_path, 'model_best*'))
+        if checkpoint_paths:
+            checkpoint_to_load = checkpoint_paths[0]
+            print('Loading latest checkpoint: {}'.format(checkpoint_to_load))
+
+            # load checkpoint and unpack values
+            checkpoint = torch.load(checkpoint_to_load)
+            try:
+                shared_model.load_state_dict(checkpoint['state_dict'])
+                optimizer.load_state_dict(checkpoint['optimizer'])
+                args.total_length = checkpoint['total_length']
+                args.episode_number = checkpoint['episode_number']
+                args.num_backprops = checkpoint.get('num_backprops', 0)
+                checkpoint_counter = checkpoint.get('counter', False)  # if not set, set to 0 later
+            except KeyError as e:
+                print('Exception reading checkpoint: {}'.format(e))
+
+            print('Checkpoint loaded with total_length: {} and episode_number: {}'.format(
+                checkpoint['total_length'], checkpoint['episode_number']))
+        else:
+            print('No model checkpoint to found! Checkpoint is only saved every '
+                  '(args.checkpoint_freq // args.num_processes) steps on first process')
 
-    counter = mp.Value('i', 0)
+    # Creating TensorBoard writer and necessary folders
+    # writer = SummaryWriter(comment='A3C',  # this will create dirs
+    #                        log_dir=args.tensorboard_path, purge_step=args.episode_number)
+    # # run tensorboardX --logs_dir args.tensorboard_path in terminal and open browser e.g.
+    # print('-----------------\nTensorboard command:\n'
+    #       'tensorboard --logdir experiments/{}/tensorboard_logs'
+    #       '\n-----------------'.format(args.experiment_id))
+    # todo remove the above or make it work
+    # todo work out why json dump can work in non-folder and remove above
+
+    # Save argparse arguments and environment config from last resume or first start
+    with open(os.path.join(args.experiment_path, 'latest_args.json'), 'w') as f:
+        args_dict = vars(args)
+        args_dict['experiment_id'] = str(args.experiment_id)
+        json.dump(args_dict, f)
+    with open(os.path.join(args.experiment_path, 'latest_sys_args.txt'), 'w') as f:
+        f.write(" ".join(sys.argv[:]))  # just to see find command was actually run on terminal
+    if args.env != 'vizdoom' and args.env != 'atari':
+        # Save ai2thor config
+        with open(args.last_config_resume_path, 'w') as f:
+            json.dump(env.config, f)
+
+    # todo try fix multiprocessing again
+    # process initialisation and training/testing starting
+    processes = []
+    counter = mp.Value('i', 0 if not checkpoint_counter else checkpoint_counter)
     lock = mp.Lock()
+    try:
+        if args.synchronous:
+            # synchronous so only 1 process. Best for debugging or just 1 thread A2C
+            rank = 0
+            args.num_processes = 1
+            writer = None
+            # test(args.num_processes, args, shared_model, counter)  # check test functionality
+            train(rank, args, shared_model, counter, lock, writer, optimizer)  # todo
+        else:
+            # test runs continuously and if episode ends, sleeps for args.test_sleep_time seconds
+            # p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter))
+            # p.start()
+            # processes.append(p)  # todo if not ai2thor?
 
-    if not args.synchronous:
-        # test runs continuously and if episode ends, sleeps for args.test_sleep_time seconds
-        p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter))
-        p.start()
-        processes.append(p)
-
-        for rank in range(0, args.num_processes):
-            p = mp.Process(target=train, args=(rank, args, shared_model, counter, lock, optimizer))
-            p.start()
-            processes.append(p)
-        for p in processes:
-            p.join()
-    else:
-        rank = 0
-        # test(args.num_processes, args, shared_model, counter)  # for checking test functionality
-        train(rank, args, shared_model, counter, lock, optimizer)  # run train on main thread
+            for rank in range(0, args.num_processes):
+                if rank == 0:
+                    # writer_to_send = writer
+                    writer_to_send = None
+                else:
+                    writer_to_send = None
+                p = mp.Process(target=train, args=(rank, args, shared_model, counter, lock,
+                                                   writer_to_send, optimizer))
+                p.start()
+                processes.append(p)
+            for p in processes:
+                p.join()
+    finally:
+        pass
+        # writer.export_scalars_to_json(os.path.join(args.experiment_path, 'all_scalars.json'))
+        # writer.close() ## todo
diff --git a/algorithms/a3c/model.py b/algorithms/a3c/model.py
index cd5b3b7..fc4d087 100644
--- a/algorithms/a3c/model.py
+++ b/algorithms/a3c/model.py
@@ -1,5 +1,6 @@
 """
-Adapted from: https://github.com/ikostrikov/pytorch-a3c/blob/master/model.py
+ActorCritic Adapted from: https://github.com/ikostrikov/pytorch-a3c/blob/master/model.py
+A3C_LSTM_GA adapted from: https://github.com/devendrachaplot/DeepRL-Grounding/blob/master/models.py
 
 Main A3C model which outputs predicted value, action logits and hidden state.
 Includes helper functions too for weight initialisation and dynamically computing LSTM/flatten input
@@ -12,35 +13,20 @@
 import torch.nn.functional as F
 
 
-def calculate_lstm_input_size_after_4_conv_layers(frame_dim, stride=2, kernel_size=3, padding=1,
-                                     num_filters=32):
-    """
-    Assumes square resolution image. Find LSTM size after 4 conv layers below in A3C using regular
-    Convolution math. For example:
-    42x42 -> (42 − 3 + 2)÷ 2 + 1 = 21x21 after 1 layer
-    11x11 after 2 layers -> 6x6 after 3 -> and finally 3x3 after 4 layers
-    Therefore lstm input size after flattening would be (3 * 3 * num_filters)
-    """
-
-    width = (frame_dim - kernel_size + 2 * padding) // stride + 1
-    width = (width - kernel_size + 2 * padding) // stride + 1
-    width = (width - kernel_size + 2 * padding) // stride + 1
-    width = (width - kernel_size + 2 * padding) // stride + 1
-
-    return width * width * num_filters
-
 def normalized_columns_initializer(weights, std=1.0):
+    # todo can still explain more
     """
     Weights are normalized over their column. Also, allows control over std which is useful for
     initialising action logit output so that all actions have similar likelihood
     """
 
     out = torch.randn(weights.size())
-    out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True))
+    out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True).expand_as(out))
     return out
 
 
 def weights_init(m):
+    # todo add docstring on why magic numbers like 6 and name of normalisation
     classname = m.__class__.__name__
     if classname.find('Conv') != -1:
         weight_shape = list(m.weight.data.size())
@@ -60,26 +46,40 @@ def weights_init(m):
 
 class ActorCritic(torch.nn.Module):
     """
-    Mainly Ikostrikov's implementation of A3C (https://arxiv.org/abs/1602.01783).
+    Ikostrikov's implementation of A3C (https://arxiv.org/abs/1602.01783).
+
+____________________________________________________________________________________________________
+
+                            A3C policy model architecture
+
+   Image Processing module -> Flattened output ->     Policy Learning Module    --->  Final output
+           ______________________                ___________________________________
+          |     _______    ____ |     __       |      ___________                 |
+image ->  | 4x |conv2d| + |ELU| |    |__|      |     |   LSTM   | --> Critic FC-> | -> value
+          |    |______|   |___| | -> |__| -->  | --> |__________| --> Actor FC -> | -> policy logits
+          |                     |    |__|      |        ^   ^                     | -> (hx, cx)
+          |                     |    |__|      |        |   |                     |
+          |_____________________|              |  prev cx  hx                     |
+                                               |__________________________________|
+____________________________________________________________________________________________________
 
     Processes an input image (with num_input_channels) with 4 conv layers,
     interspersed with 4 elu activation functions. The output of the final layer is then flattened
     and passed to an LSTM (with previous or initial hidden and cell states (hx and cx)).
     The new hidden state is used as an input to the critic and value nn.Linear layer heads,
-    The final output is then predicted value, action logits, hx and cx.
+    The final output is then the predicted value, action logits, hx and cx.
     """
 
-    def __init__(self, num_input_channels, num_outputs, frame_dim):
+    def __init__(self, num_input_channels, num_outputs, resolution):
         super(ActorCritic, self).__init__()
         self.conv1 = nn.Conv2d(num_input_channels, 32, 3, stride=2, padding=1)
         self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
         self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
         self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
 
-        # assumes square image
-        self.lstm_cell_size = calculate_lstm_input_size_after_4_conv_layers(frame_dim)
+        self.lstm_cell_size = self.calculate_lstm_input_size_for_A3C(resolution)
 
-        self.lstm = nn.LSTMCell(self.lstm_cell_size, 256)  # for 128x128 input
+        self.lstm = nn.LSTMCell(self.lstm_cell_size, 256)
 
         self.critic_linear = nn.Linear(256, 1)
         self.actor_linear = nn.Linear(256, num_outputs)
@@ -99,8 +99,6 @@ def __init__(self, num_input_channels, num_outputs, frame_dim):
 
     def forward(self, inputs):
         inputs, (hx, cx) = inputs
-        if len(inputs.size()) == 3:  # if batch forgotten
-            inputs = inputs.unsqueeze(0)
         x = F.elu(self.conv1(inputs))
         x = F.elu(self.conv2(x))
         x = F.elu(self.conv3(x))
@@ -111,3 +109,202 @@ def forward(self, inputs):
         x = hx
 
         return self.critic_linear(x), self.actor_linear(x), (hx, cx)
+
+    @staticmethod
+    def calculate_lstm_input_size_for_A3C(resolution, stride=2, kernel_size=3, padding=1,
+                                          num_filters=32):
+        """
+        Find LSTM size after 4 conv layers below in A3C using regular
+        convolution math. For example:
+        42x42 -> (42 − 3 + 2)÷ 2 + 1 = 21x21 after 1 layer
+        11x11 after 2 layers -> 6x6 after 3 -> and finally 3x3 after 4 layers
+        Therefore lstm input size after flattening would be (3 * 3 * num_filters)
+        We assume that the same kernel_size, padding and stride is used in all convolutional layers
+        """
+        width = (resolution[0] - kernel_size + 2 * padding) // stride + 1
+        width = (width - kernel_size + 2 * padding) // stride + 1
+        width = (width - kernel_size + 2 * padding) // stride + 1
+        width = (width - kernel_size + 2 * padding) // stride + 1
+
+        height = (resolution[1] - kernel_size + 2 * padding) // stride + 1
+        height = (height - kernel_size + 2 * padding) // stride + 1
+        height = (height - kernel_size + 2 * padding) // stride + 1
+        height = (height - kernel_size + 2 * padding) // stride + 1
+
+        return width * height * num_filters
+
+
+class A3C_LSTM_GA(torch.nn.Module):
+    """
+    ASCII charts on architecture, Check original paper and check original charts at:
+    Gated-Attention Architectures for Task-Oriented Language Grounding
+    https://arxiv.org/abs/1706.07230
+____________________________________________________________________________________________________
+
+                            Figure 2. Model Architecture and state processing
+
+                    Image Processing (f_theta_image)         Image Repr.
+                  ___________________________________       __
+                 |  _______     _______     _______ |      |__|
+      image ->   | |conv2d| -> |conv2d| -> |conv2d| | ---> |__|   ---------|
+                 | |______|    |______|    |______| |      |__|            |
+                 |__________________________________|      |__|            |
+                                                                           |   ________     _______
+              Instruction Processing (f_theta_language) Instruction Repr.  |  | multi |    |      |
+                 _____________________________              __             -->| modal | -> |policy|
+instruction ->  |           ________         |             |__|            |  |fusion |    |      |
+word indices    |          |GRU RNN|         |        ---> |__|            |  |_______|    |______|
+                |          |_______|         |             |__|   ---------|
+                |____________________________|             |__|
+
+____________________________________________________________________________________________________
+
+                                Figure 3: Gated-Attention unit architecture.
+
+Image Representation                          Gated-Attention Multi-modal Fusion unit
+      _______
+     |  ____|__                                                       _______
+     | |  ____|__            ________________________                |  ____|__
+     |_| |  ____|__   ----> | element-wise multiply |  --------->    | |  ____|__    to policy
+       |_| |conv2d|         |_______________________|                |_| |  ____|__   ------>
+         |_|      |                                                    |_| |conv2d|
+           |______|                            ^                         |_|      |
+                                               |                           |______|
+                                               |
+                                               |
+
+Instruction Repr. -> Attention vector (a_L) -> Gated-Attention filters
+  __                    __                    _______
+ |__|                  |__|                  |  ____|__
+ |__|     ---->        |__|    ---->         | |  ____|__
+ |__|                  |__|                  |_| |  ____|__
+ |__|                  |__|                    |_| |conv2d|
+                                                 |_|      |
+                                                   |______|
+
+____________________________________________________________________________________________________
+
+                            Figure 4: A3C policy model architecture
+
+   Flattened GA fusion output        --->           Policy Learning Module    --->   Final output
+                               ____________________________________________________
+       __                     |     _________         ___________                 |
+      |__|                    |    |FC layer|  --->  |   LSTM   | --> Critic FC-> | -> value
+      |__|      ------>       | -> |________|        |__________| --> Actor FC -> | -> policy logits
+      |__|                    |                         ^   ^           ^         | -> (hx, cx)
+      |__|                    |                         |   |           |         |
+                              |                   prev cx  hx        tx emb       |
+                              |___________________________________________________|
+____________________________________________________________________________________________________
+
+    Very similar to the above ActorCritic but has Gated Attention (GA) and processes an instruction
+    which is a part of the input state using a GRU. There is also a time embedding layer to help
+    stabilize value prediction and only 3 conv layers compared to ActorCritic's 4 layers.
+    Originally ran on the ViZDoom environment. The above ASCII art figures and commented code
+    below should make the flow quite clear.
+
+    The gated attention multi-modal fusion module enables the policy to focus on certain parts
+    of the input image given the instruction e.g. for the instruction "Go to the red cup", a
+    specific attention filter could be learned which would enable the agent to language ground
+    itself in the meaning of both "red", "cup" and "go to". Language grounding is the ability to
+    map meaning within symbols+language into real world objects and goals.
+    """
+
+    def __init__(self, num_input_channels, num_outputs, resolution, vocab_size, episode_length):
+        super(A3C_LSTM_GA, self).__init__()
+
+        self.output_width, self.output_height = self.\
+            calculate_input_width_height_for_A3C_LSTM_GA(resolution)
+        self.num_output_filters = 64
+        self.lstm_cell_size = self.output_width * self.output_height * self.num_output_filters
+
+        # Image Processing
+        self.conv1 = nn.Conv2d(num_input_channels, 128, kernel_size=8, stride=4)
+        self.conv2 = nn.Conv2d(128, 64, kernel_size=4, stride=2)
+        self.conv3 = nn.Conv2d(64, self.num_output_filters, kernel_size=4, stride=2)
+
+        # Natural Language Instruction Processing
+        self.gru_hidden_size = 256
+        self.input_size = vocab_size
+        self.embedding = nn.Embedding(self.input_size, 32)
+        self.gru = nn.GRU(32, self.gru_hidden_size)
+
+        # Gated-Attention layers
+        self.attn_linear = nn.Linear(self.gru_hidden_size, self.num_output_filters)
+
+        # Time embedding layer, helps in stabilizing value prediction e.g. if only 1 step is left
+        # in episode not much reward can be earned vs the same image state with 500 steps left
+        self.time_emb_dim = 32
+        self.time_emb_layer = nn.Embedding(episode_length, self.time_emb_dim)
+
+        # A3C-LSTM layers (extra self.time_emb_dim input to critic+actor)
+        self.linear = nn.Linear(self.lstm_cell_size, 256)
+        self.lstm = nn.LSTMCell(256, 256)
+        self.critic_linear = nn.Linear(256 + self.time_emb_dim, 1)
+        self.actor_linear = nn.Linear(256 + self.time_emb_dim, num_outputs)
+
+        # Initializing weights
+        self.apply(weights_init)
+        self.actor_linear.weight.data = normalized_columns_initializer(
+            self.actor_linear.weight.data, 0.01)
+        self.actor_linear.bias.data.fill_(0)
+        self.critic_linear.weight.data = normalized_columns_initializer(
+            self.critic_linear.weight.data, 1.0)
+        self.critic_linear.bias.data.fill_(0)
+        self.lstm.bias_ih.data.fill_(0)
+        self.lstm.bias_hh.data.fill_(0)
+
+        self.train()
+
+    def forward(self, inputs):
+        x, input_inst, (tx, hx, cx) = inputs
+
+        # Get the image representation
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x_image_rep = F.relu(self.conv3(x))
+
+        # Get the instruction representation
+        encoder_hidden = torch.zeros(1, 1, self.gru_hidden_size)
+        for i in range(input_inst.data.size(1)):
+            word_embedding = self.embedding(input_inst[0, i]).view(1, 1, -1)  # 1x1x32
+            _, encoder_hidden = self.gru(word_embedding, encoder_hidden)
+        x_instr_rep = encoder_hidden.view(encoder_hidden.size(1), -1)
+
+        # Get the attention vector from the instruction representation
+        x_attention = F.sigmoid(self.attn_linear(x_instr_rep))
+
+        # Gated-Attention expansion from vector into self.num_output_filters depth slices
+        # so each depth slice has the same attention value spread over the entire width and height
+        x_attention = x_attention.unsqueeze(2).unsqueeze(3)
+        x_attention = x_attention.expand(1, self.num_output_filters, self.output_height,
+                                         self.output_width)  # BCHW -> 1x64xHxW
+        assert x_image_rep.size() == x_attention.size()  # BCHW must be the same for element-wise
+        x = x_image_rep * x_attention  # element-wise multiplication between attention and filters
+        x = x.view(x.size(0), -1)
+
+        # A3C-LSTM
+        x = F.relu(self.linear(x))
+        hx, cx = self.lstm(x, (hx, cx))
+        time_emb = self.time_emb_layer(tx)
+        x = torch.cat((hx, time_emb.view(-1, self.time_emb_dim)), 1)
+
+        return self.critic_linear(x), self.actor_linear(x), (hx, cx)
+
+    @staticmethod
+    def calculate_input_width_height_for_A3C_LSTM_GA(resolution):
+        """
+        Similar to the calculate_lstm_input_size_for_A3C function except that there are only
+        3 conv layers and there is variation among the kernel_size, stride, the number of channels
+        and there is no padding. Therefore these are hardcoded. Check A3C_LSTM_GA class for these
+        numbers. Also, returns tuple representing (width, height) instead of size
+        """
+        width = (resolution[0] - 8) // 4 + 1
+        width = (width - 4) // 2 + 1
+        width = (width - 4) // 2 + 1
+
+        height = (resolution[1] - 8) // 4 + 1
+        height = (height - 4) // 2 + 1
+        height = (height - 4) // 2 + 1
+
+        return width, height
diff --git a/algorithms/a3c/my_optim.py b/algorithms/a3c/my_optim.py
index dfdc529..863d6bd 100644
--- a/algorithms/a3c/my_optim.py
+++ b/algorithms/a3c/my_optim.py
@@ -2,10 +2,12 @@
 Adapted from: https://github.com/ikostrikov/pytorch-a3c/blob/master/my_optim.py
 
 In the original A3C paper (https://arxiv.org/abs/1602.01783), the authors compared 3 different
-optimizers i.e. Momentum SGD, RMSProp and Shared RMSProp (check final part of section 4). The
-difference between the 3rd compared to the 2nd is whether to compute shared statistics across all
-threads, which was found to be more robust. It seems the equivalent was implemented for Adam
-below.
+optimizers i.e. Momentum SGD, RMSProp and Shared RMSProp (check final part of section 4 and
+appendix). The difference between Shared RMSProp compared to an approach that uses a separate
+RMSProp on each thread, is whether to compute shared statistics g across all threads/processes and
+update them asynchronously. This was found to be more robust. It seems the equivalent was
+implemented for Adam below.
+# todo add formulat
 """
 
 import math
@@ -74,8 +76,7 @@ def step(self, closure=None):
 
                 bias_correction1 = 1 - beta1 ** state['step'].item()
                 bias_correction2 = 1 - beta2 ** state['step'].item()
-                step_size = group['lr'] * math.sqrt(
-                    bias_correction2) / bias_correction1
+                step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
 
                 p.data.addcdiv_(-step_size, exp_avg, denom)
 
diff --git a/algorithms/a3c/test.py b/algorithms/a3c/test.py
index d96b661..19be7d3 100644
--- a/algorithms/a3c/test.py
+++ b/algorithms/a3c/test.py
@@ -9,36 +9,49 @@
 import time
 from collections import deque
 
+import numpy as np
 import torch
 import torch.nn.functional as F
 
 from gym_ai2thor.envs.ai2thor_env import AI2ThorEnv
-from algorithms.a3c.envs import create_atari_env
-from algorithms.a3c.model import ActorCritic
+from gym_ai2thor.task_utils import unpack_state
+from algorithms.a3c.env_atari import create_atari_env
+from algorithms.a3c.model import ActorCritic, A3C_LSTM_GA
 
 
 def test(rank, args, shared_model, counter):
     torch.manual_seed(args.seed + rank)
 
-    if args.atari:
+    if args.env == 'atari':
         env = create_atari_env(args.atari_env_name)
-    else:
-        args.config_dict = {'max_episode_length': args.max_episode_length}
-        env = AI2ThorEnv(config_dict=args.config_dict)
+    elif args.env == 'vizdoom':
+        # many more dependencies required for VizDoom
+        from algorithms.a3c.env_vizdoom import GroundingEnv
+
+        env = GroundingEnv(args)
+        env.game_init()
+    elif args.env == 'ai2thor':
+        env = AI2ThorEnv(config_file=args.config_file_path, config_dict=args.config_dict)
     env.seed(args.seed + rank)
 
-    model = ActorCritic(env.observation_space.shape[0], env.action_space.n, args.frame_dim)
+    if env.task.has_language_instructions:
+        model = A3C_LSTM_GA(env.observation_space.shape[0], env.action_space.n,
+                            args.resolution, len(env.task.word_to_idx), args.max_episode_length)
+    else:
+        model = ActorCritic(env.observation_space.shape[0], env.action_space.n, args.resolution)
 
     model.eval()
 
+    # instruction_indices is None if task doesn't require language instructions
     state = env.reset()
-    state = torch.from_numpy(state)
-    reward_sum = 0
+    image_state, instruction_indices = unpack_state(state, env)
+
     done = True
 
     start_time = time.time()
+    reward_sum = 0
 
-    # a quick hack to prevent the agent from stucking
+    # a quick hack to prevent the agent from getting stuck
     actions = deque(maxlen=100)
     episode_length = 0
     while True:
@@ -55,7 +68,13 @@ def test(rank, args, shared_model, counter):
             hx = hx.detach()
 
         with torch.no_grad():
-            value, logit, (hx, cx) = model((state.unsqueeze(0).float(), (hx, cx)))
+            if not env.task.has_language_instructions:
+                value, logit, (hx, cx) = model((image_state.unsqueeze(0).float(), (hx, cx)))
+            else:
+                tx = torch.from_numpy(np.array([episode_length])).long()
+                value, logit, (hx, cx) = model((image_state.unsqueeze(0).float(),
+                                                instruction_indices.long(),
+                                                (tx, hx, cx)))
         prob = F.softmax(logit, dim=-1)
         action = prob.max(1, keepdim=True)[1].numpy()
 
@@ -64,7 +83,7 @@ def test(rank, args, shared_model, counter):
         reward_sum += reward
 
         # a quick hack to prevent the agent from stucking
-        # i.e. in test mode an agent can repeat an action ad infinitum
+        # i.e. in test mode an agent can repeat an action ad infinitum and we avoid this
         actions.append(action[0, 0])
         if actions.count(actions[0]) == actions.maxlen:
             print('In test. Episode over because agent repeated action {} times'.format(
@@ -72,15 +91,16 @@ def test(rank, args, shared_model, counter):
             done = True
 
         if done:
-            print("Time {}, num steps over all threads {}, FPS {:.0f}, episode reward {}, episode length {}".format(
-                time.strftime("%Hh %Mm %Ss",
-                              time.gmtime(time.time() - start_time)),
-                counter.value, counter.value / (time.time() - start_time),
-                reward_sum, episode_length))
+            print("In test. Time {}, num steps over all threads {}, FPS {:.0f}, episode reward "
+                  "{:.4f}, episode length {}".format(time.strftime("%Hh %Mm %Ss",
+                                             time.gmtime(time.time() - start_time)),
+                                        counter.value, counter.value / (time.time() - start_time),
+                                             reward_sum, episode_length))
             reward_sum = 0
             episode_length = 0
             actions.clear()
             state = env.reset()
+
             time.sleep(args.test_sleep_time)
 
-        state = torch.from_numpy(state)
+        image_state, instruction_indices = unpack_state(state, env)
diff --git a/algorithms/a3c/train.py b/algorithms/a3c/train.py
index ba77d80..2ece0ec 100644
--- a/algorithms/a3c/train.py
+++ b/algorithms/a3c/train.py
@@ -11,54 +11,106 @@
 Generalized Advantage Estimation (GAE) with the entropy loss added onto policy loss to encourage
 exploration. Once these losses have been calculated, we add them all together, backprop to find all
 gradients and then optimise with Adam and we go back to the start of the main training loop.
+
+if natural_language is set to True, environment returns sentence instruction with image as state.
+A3C_LSTM_GA model is used instead.
 """
 
+import time
+import os
+import shutil
+
+import numpy as np
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
+from tensorboardX import SummaryWriter
 
 from gym_ai2thor.envs.ai2thor_env import AI2ThorEnv
-from algorithms.a3c.envs import create_atari_env
-from algorithms.a3c.model import ActorCritic
+from algorithms.a3c.env_atari import create_atari_env
+from algorithms.a3c.model import ActorCritic, A3C_LSTM_GA
+from gym_ai2thor.task_utils import unpack_state
+
 
+def save_checkpoint(save_object, checkpoint_path):
+    fp = os.path.join(checkpoint_path, 'model_best.pth.tar')
+    print('Saving best model to path: {}'.format(fp))
+    torch.save(save_object, fp)
 
 def ensure_shared_grads(model, shared_model):
+    """
+    In DeepMind's paper they say they perform async updates without locks to maximise throughput
+    Check these issues for more information:
+    https://github.com/ikostrikov/pytorch-a3c/issues/25
+    https://discuss.pytorch.org/t/problem-on-variable-grad-data/957/15
+    """
     for param, shared_param in zip(model.parameters(),
                                    shared_model.parameters()):
         if shared_param.grad is not None:
-            return
-        shared_param._grad = param.grad
+            return  # todo see if this ever happens. And also see how it works under concurrency
+        shared_param._grad = param.grad  # _grad is writable
 
+def train(rank, args, shared_model, counter, lock, writer, optimizer=None):
+    """
+    Main A3C or A3C_LSTM_GA initialisation and train loop
+    """
+    train_start_time = time.time()
+
+    if rank == 0:
+        # todo try max_queue param to see if it increases how long it takes before blocking
+        # todo find exactly when it blocks. See if omp is the issue
+        writer = SummaryWriter(comment='A3C',  # this will create dirs
+                               log_dir=args.tensorboard_path, purge_step=args.episode_number)
+        # run tensorboardX --logs_dir args.tensorboard_path in terminal and open browser e.g.
+        print('Rank == 0. Writer created in this process')
+        print('-----------------\nTensorboard command (from root):\n'
+              'tensorboard --logdir algorithms/a3c/experiments/{}/tensorboard_logs'
+              '\n-----------------'.format(args.experiment_id))
 
-def train(rank, args, shared_model, counter, lock, optimizer=None):
     torch.manual_seed(args.seed + rank)
 
-    if args.atari:
+    if args.env == 'atari':
         env = create_atari_env(args.atari_env_name)
-    else:
-        args.config_dict = {'max_episode_length': args.max_episode_length}
-        env = AI2ThorEnv(config_dict=args.config_dict)
-    env.seed(args.seed + rank)
+    elif args.env == 'vizdoom':
+        # many more dependencies required for VizDoom
+        from algorithms.a3c.env_vizdoom import GroundingEnv
 
-    model = ActorCritic(env.observation_space.shape[0], env.action_space.n, args.frame_dim)
+        env = GroundingEnv(args)
+        env.game_init()
+    elif args.env == 'ai2thor':
+        env = AI2ThorEnv(config_file=args.config_file_path, config_dict=args.config_dict)
+    env.seed(args.seed + rank)  # todo actually make it work?
+
+    if env.task.has_language_instructions:
+        model = A3C_LSTM_GA(env.observation_space.shape[0], env.action_space.n,
+                            args.resolution, len(env.task.word_to_idx), args.max_episode_length)
+    else:
+        model = ActorCritic(env.observation_space.shape[0], env.action_space.n, args.resolution)
 
     if optimizer is None:
         optimizer = optim.Adam(shared_model.parameters(), lr=args.lr)
 
     model.train()
 
+    # instruction_indices is None if task doesn't require language instructions
     state = env.reset()
-    state = torch.from_numpy(state)
+    image_state, instruction_indices = unpack_state(state, env)
     done = True
 
-    # monitoring
-    total_reward_for_num_steps_list = []
+    # monitoring and logging variables
+    avg_over_num_episodes = 10
+    avg_episode_returns = []
+    avg_episode_return, best_avg_episode_return = -np.inf, -np.inf
     episode_total_rewards_list = []
     all_rewards_in_episode = []
-    avg_reward_for_num_steps_list = []
-
-    total_length = 0
+    p_losses = []
+    v_losses = []
+    total_length = args.total_length
+    episode_number = args.episode_number
     episode_length = 0
+    num_backprops = args.num_backprops
+
+    # main infinite loop
     while True:
         # Sync with the shared model
         model.load_state_dict(shared_model.state_dict())
@@ -66,6 +118,8 @@ def train(rank, args, shared_model, counter, lock, optimizer=None):
             cx = torch.zeros(1, 256)
             hx = torch.zeros(1, 256)
         else:
+            # Detach hidden states to make sure we don't backprop through more
+            # than args.num_steps hidden states
             cx = cx.detach()
             hx = hx.detach()
 
@@ -74,80 +128,157 @@ def train(rank, args, shared_model, counter, lock, optimizer=None):
         rewards = []
         entropies = []
 
+        interaction_start_time = time.time()
+        # interact with environment for args.num_steps to get log_probs+entropies+rewards+values
         for step in range(args.num_steps):
-            episode_length += 1
-            total_length += 1
-            value, logit, (hx, cx) = model((state.unsqueeze(0).float(), (hx, cx)))
+            # save model every args.checkpoint_freq
+            if rank == 0 and total_length > 0 and total_length % (args.checkpoint_freq //
+                                                                  args.num_processes) == 0:
+                checkpoint_dict = {
+                    'total_length': total_length,
+                    'episode_number': episode_number,
+                    'num_backprops': num_backprops,
+                    'counter': counter.value,
+                    'state_dict': model.state_dict(),
+                    'optimizer': optimizer.state_dict()
+                }
+                if avg_episode_return > best_avg_episode_return:
+                    save_checkpoint(checkpoint_dict, args.checkpoint_path)
+
+            # Run model to get predicted value, action logits and LSTM hidden+cell state
+            if not env.task.has_language_instructions:
+                value, logit, (hx, cx) = model((image_state.unsqueeze(0).float(), (hx, cx)))
+            else:
+                # Time embedding from integer to stabilise value prediction and instruction indices
+                tx = torch.from_numpy(np.array([episode_length])).long()
+                value, logit, (hx, cx) = model((image_state.unsqueeze(0).float(),
+                                                instruction_indices.long(),
+                                                (tx, hx, cx)))
+            # Calculate probabilities from action logit, all log probabilities and entropy
             prob = F.softmax(logit, dim=-1)
             log_prob = F.log_softmax(logit, dim=-1)
             entropy = -(log_prob * prob).sum(1, keepdim=True)
-            entropies.append(entropy)
 
+            # sample action, get log_prob for specific action and action int for environment step
             action = prob.multinomial(num_samples=1).detach()
             log_prob = log_prob.gather(1, action)
-
             action_int = action.numpy()[0][0].item()
+            # step and unpack state. Store value, log_prob, reward and entropy below
             state, reward, done, _ = env.step(action_int)
+            image_state, instruction_indices = unpack_state(state, env)
 
+            episode_length += 1
+            total_length += 1
             done = done or episode_length >= args.max_episode_length
 
             with lock:
+                # increment global step counter variable across processes
                 counter.value += 1
 
-            if done:
-                episode_length = 0
-                total_length -= 1
-                total_reward_for_episode = sum(all_rewards_in_episode)
-                episode_total_rewards_list.append(total_reward_for_episode)
-                all_rewards_in_episode = []
-                state = env.reset()
-                print('Episode Over. Total Length: {}. Total reward for episode: {}'.format(
-                                            total_length,  total_reward_for_episode))
-                print('Step no: {}. total length: {}'.format(episode_length, total_length))
-
-            state = torch.from_numpy(state)
+            # logging, saving stats, benchmarking and resetting
             values.append(value)
             log_probs.append(log_prob)
             rewards.append(reward)
+            entropies.append(entropy)
             all_rewards_in_episode.append(reward)
-
             if done:
-                break
+                total_reward_for_episode = sum(all_rewards_in_episode)
+                all_rewards_in_episode = []
+                episode_total_rewards_list.append(total_reward_for_episode)
+                # only calculate after after set number of episodes have passed
+                if len(episode_total_rewards_list) > avg_over_num_episodes:
+                    avg_episode_return = sum(episode_total_rewards_list[-avg_over_num_episodes:]) \
+                                        / len(episode_total_rewards_list[-avg_over_num_episodes:])
+                    avg_episode_returns.append(avg_episode_return)
+                    # writer.add_scalar('avg_episode_returns', avg_episode_return, episode_number)
+
+                # todo print average episode return
+                print('Rank: {}. Episode {} Over. Total Length: {}. Total reward for episode: '
+                      '{:.4f}'.format(rank, episode_number, total_length, total_reward_for_episode))
+                print('Rank: {}. Step no: {}. total length: {}'.format(rank, episode_length,
+                                                                       total_length))
+                print('Rank: {}. Total Length: {}. Counter across all processes: {}. '
+                      'Total reward for episode: {:.4f}'.format(rank, total_length, counter.value,
+                                                                total_reward_for_episode))
+                if rank == 0:
+                    pass
+                    # writer.add_scalar('episode_lengths', episode_length, episode_number)
+                    # writer.add_scalar('episode_total_rewards', total_reward_for_episode,
+                    #                   episode_number)
 
-        # No interaction with environment below.
-        # Monitoring
-        total_reward_for_num_steps = sum(rewards)
-        total_reward_for_num_steps_list.append(total_reward_for_num_steps)
-        avg_reward_for_num_steps = total_reward_for_num_steps / len(rewards)
-        avg_reward_for_num_steps_list.append(avg_reward_for_num_steps)
+                # instruction_indices is None if task doesn't require language instructions
+                state = env.reset()
+                image_state, instruction_indices = unpack_state(state, env)
 
-        # Backprop and optimisation
-        R = torch.zeros(1, 1)
-        if not done:  # to change last reward to predicted value to ....
-            value, _, _ = model((state.unsqueeze(0).float(), (hx, cx)))
-            R = value.detach()
+                episode_number += 1
+                episode_length = 0
+                break
 
-        values.append(R)
+        # Calculation of returns, advantages, value_loss, GAE, policy_loss.
+        # Then backprop and optimisation. No interaction with environment below.
+        curr_return = torch.zeros(1, 1)
+        # change current return to predicted value if episode wasn't terminal
+        if not done:
+            if not env.task.has_language_instructions:
+                value, _, _ = model((image_state.unsqueeze(0).float(), (hx, cx)))
+            else:
+                value, _, _ = model((image_state.unsqueeze(0).float(),
+                                     instruction_indices.long(),
+                                     (tx, hx, cx)))
+            curr_return = value.detach()
+
+        # if episode is terminal, curr_return is 0. Otherwise, predicted value is curr_return
+        # This is because V_t+1(S) will need to succeed for terminal state below: values[i + 1]
+        # and to estimate GAE online where args.num_steps is much less than episode length
+        values.append(curr_return)
         policy_loss = 0
         value_loss = 0
-        # import pdb;pdb.set_trace() # good place to breakpoint to see training cycle
         gae = torch.zeros(1, 1)
         for i in reversed(range(len(rewards))):
-            R = args.gamma * R + rewards[i]
-            advantage = R - values[i]
+            # Calculate returns, advantages and value_loss
+            curr_return = args.gamma * curr_return + rewards[i]
+            advantage = curr_return - values[i]
             value_loss = value_loss + 0.5 * advantage.pow(2)
 
-            # Generalized Advantage Estimation
+            # Generalized Advantage Estimation (GAE). TD residual: delta_t = r_t + 𝛾 * V_t+1 - V_t
             delta_t = rewards[i] + args.gamma * values[i + 1] - values[i]
-            gae = gae * args.gamma * args.tau + delta_t
+            gae = gae * args.gamma * args.gae_lambda + delta_t
 
+            # log probability of action multipled with GAE with entropy to encourage exploration
             policy_loss = policy_loss - log_probs[i] * gae.detach() - \
-                          args.entropy_coef * entropies[i]
+                          args.entropy_coef * entropies[i]  # negative because SGD, not SGA
 
+        # Backprop, gradient norm clipping and optimisation
         optimizer.zero_grad()
-
         (policy_loss + args.value_loss_coef * value_loss).backward()
         torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
-
         ensure_shared_grads(model, shared_model)
         optimizer.step()
+
+        # benchmarking and general info
+        num_backprops += 1
+        # if rank == 0:
+        #     writer.add_scalar('policy_loss', policy_loss.item(), num_backprops)
+        #     writer.add_scalar('value_loss', value_loss.item(), num_backprops)
+        p_losses.append(policy_loss.item())
+        v_losses.append(value_loss.item())
+
+        if len(p_losses) > 1000:  # 1000 * 20 (args.num_steps default) = every 20000 steps
+            print(" ".join([
+                "Training thread: {}".format(rank),
+                "Num backprops: {}".format(num_backprops),
+                "Avg policy loss: {}".format(np.mean(p_losses)),
+                "Avg value loss: {}".format(np.mean(v_losses))]))
+            p_losses = []
+            v_losses = []
+
+        if rank == 0 and args.verbose_num_steps:
+            print('Step no: {}. total length: {}. Time elapsed: {}m'.format(  # todo time elapsed would be nice outside verbose
+                episode_length,
+                total_length,
+                round((time.time() - train_start_time) / 60.0, 3)
+                ))
+
+            if num_backprops % 100 == 0:
+                print('Time taken for args.steps ({}): {}'.format(args.num_steps,
+                      round(time.time() - interaction_start_time, 3)))
diff --git a/algorithms/a3c/vizdoom_data/instructions_all.json b/algorithms/a3c/vizdoom_data/instructions_all.json
new file mode 100644
index 0000000..6cd5bbd
--- /dev/null
+++ b/algorithms/a3c/vizdoom_data/instructions_all.json
@@ -0,0 +1,595 @@
+[
+    {
+        "instruction": "Go to the blue torch",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch"
+        ],
+        "description": "blue torch"
+    },
+    {
+        "instruction": "Go to the short red object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn"
+        ],
+        "description": "short red object"
+    },
+    {
+        "instruction": "Go to the red short object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn"
+        ],
+        "description": "red short object"
+    },
+    {
+        "instruction": "Go to the smallest yellow object",
+        "targets": [
+            "YellowCard",
+            "YellowSkull"
+        ],
+        "description": "smallest yellow object"
+    },
+    {
+        "instruction": "Go to the red torch",
+        "targets": [
+            "RedTorch",
+            "ShortRedTorch"
+        ],
+        "description": "red torch"
+    },
+    {
+        "instruction": "Go to the smallest blue object",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch",
+            "BlueCard",
+            "BlueSkull"
+        ],
+        "description": "smallest blue object"
+    },
+    {
+        "instruction": "Go to the green torch",
+        "targets": [
+            "ShortGreenTorch",
+            "GreenTorch"
+        ],
+        "description": "green torch"
+    },
+    {
+        "instruction": "Go to the tall green torch",
+        "targets": [
+            "GreenTorch"
+        ],
+        "description": "tall green torch"
+    },
+    {
+        "instruction": "Go to the green tall torch",
+        "targets": [
+            "GreenTorch"
+        ],
+        "description": "green tall torch"
+    },
+    {
+        "instruction": "Go to the smallest green object",
+        "targets": [
+            "GreenArmor",
+            "TallGreenColumn",
+            "ShortGreenTorch",
+            "ShortGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "smallest green object"
+    },
+    {
+        "instruction": "Go to the short torch",
+        "targets": [
+            "ShortRedTorch",
+            "ShortGreenTorch",
+            "ShortBlueTorch"
+        ],
+        "description": "short torch"
+    },
+    {
+        "instruction": "Go to the tall torch",
+        "targets": [
+            "RedTorch",
+            "GreenTorch",
+            "BlueTorch"
+        ],
+        "description": "tall torch"
+    },
+    {
+        "instruction": "Go to the short red pillar",
+        "targets": [
+            "ShortRedColumn"
+        ],
+        "description": "short red pillar"
+    },
+    {
+        "instruction": "Go to the red short pillar",
+        "targets": [
+            "ShortRedColumn"
+        ],
+        "description": "red short pillar"
+    },
+    {
+        "instruction": "Go to the short red torch",
+        "targets": [
+            "ShortRedTorch"
+        ],
+        "description": "short red torch"
+    },
+    {
+        "instruction": "Go to the red short torch",
+        "targets": [
+            "ShortRedTorch"
+        ],
+        "description": "red short torch"
+    },
+    {
+        "instruction": "Go to the red tall object",
+        "targets": [
+            "TallRedColumn",
+            "RedTorch"
+        ],
+        "description": "red tall object"
+    },
+    {
+        "instruction": "Go to the tall red object",
+        "targets": [
+            "TallRedColumn",
+            "RedTorch"
+        ],
+        "description": "tall red object"
+    },
+    {
+        "instruction": "Go to the largest blue object",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch",
+            "BlueCard",
+            "BlueSkull"
+        ],
+        "description": "largest blue object"
+    },
+    {
+        "instruction": "Go to the yellow object",
+        "targets": [
+            "YellowCard",
+            "YellowSkull"
+        ],
+        "description": "yellow object"
+    },
+    {
+        "instruction": "Go to the green armor",
+        "targets": [
+            "GreenArmor"
+        ],
+        "description": "green armor"
+    },
+    {
+        "instruction": "Go to the tall object",
+        "targets": [
+            "TallRedColumn",
+            "TallGreenColumn",
+            "BlueTorch",
+            "RedTorch",
+            "GreenTorch"
+        ],
+        "description": "tall object"
+    },
+    {
+        "instruction": "Go to the red skullkey",
+        "targets": [
+            "RedSkull"
+        ],
+        "description": "red skullkey"
+    },
+    {
+        "instruction": "Go to the red object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn",
+            "TallRedColumn",
+            "RedTorch",
+            "RedSkull",
+            "RedCard",
+            "BlueArmor"
+        ],
+        "description": "red object"
+    },
+    {
+        "instruction": "Go to the green object",
+        "targets": [
+            "GreenArmor",
+            "TallGreenColumn",
+            "ShortGreenTorch",
+            "ShortGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "green object"
+    },
+    {
+        "instruction": "Go to the blue object",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch",
+            "BlueCard",
+            "BlueSkull"
+        ],
+        "description": "blue object"
+    },
+    {
+        "instruction": "Go to the tall green object",
+        "targets": [
+            "TallGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "tall green object"
+    },
+    {
+        "instruction": "Go to the green tall object",
+        "targets": [
+            "TallGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "green tall object"
+    },
+    {
+        "instruction": "Go to the tall blue object",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "tall blue object"
+    },
+    {
+        "instruction": "Go to the blue tall object",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "blue tall object"
+    },
+    {
+        "instruction": "Go to the largest red object",
+        "targets": [
+            "RedSkull",
+            "TallRedColumn",
+            "RedTorch",
+            "ShortRedTorch",
+            "RedCard",
+            "BlueArmor",
+            "ShortRedColumn"
+        ],
+        "description": "largest red object"
+    },
+    {
+        "instruction": "Go to the red pillar",
+        "targets": [
+            "TallRedColumn",
+            "ShortRedColumn"
+        ],
+        "description": "red pillar"
+    },
+    {
+        "instruction": "Go to the red tall pillar",
+        "targets": [
+            "TallRedColumn"
+        ],
+        "description": "red tall pillar"
+    },
+    {
+        "instruction": "Go to the tall red pillar",
+        "targets": [
+            "TallRedColumn"
+        ],
+        "description": "tall red pillar"
+    },
+    {
+        "instruction": "Go to the tall green pillar",
+        "targets": [
+            "TallGreenColumn"
+        ],
+        "description": "tall green pillar"
+    },
+    {
+        "instruction": "Go to the green tall pillar",
+        "targets": [
+            "TallGreenColumn"
+        ],
+        "description": "green tall pillar"
+    },
+    {
+        "instruction": "Go to the green pillar",
+        "targets": [
+            "TallGreenColumn",
+            "ShortGreenColumn"
+        ],
+        "description": "green pillar"
+    },
+    {
+        "instruction": "Go to the red tall torch",
+        "targets": [
+            "RedTorch"
+        ],
+        "description": "red tall torch"
+    },
+    {
+        "instruction": "Go to the tall red torch",
+        "targets": [
+            "RedTorch"
+        ],
+        "description": "tall red torch"
+    },
+    {
+        "instruction": "Go to the short green torch",
+        "targets": [
+            "ShortGreenTorch"
+        ],
+        "description": "short green torch"
+    },
+    {
+        "instruction": "Go to the largest green object",
+        "targets": [
+            "GreenArmor",
+            "TallGreenColumn",
+            "ShortGreenTorch",
+            "ShortGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "largest green object"
+    },
+    {
+        "instruction": "Go to the red keycard",
+        "targets": [
+            "RedCard"
+        ],
+        "description": "red keycard"
+    },
+    {
+        "instruction": "Go to the red armor",
+        "targets": [
+            "BlueArmor"
+        ],
+        "description": "red armor"
+    },
+    {
+        "instruction": "Go to the blue short object",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "blue short object"
+    },
+    {
+        "instruction": "Go to the short blue object",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "short blue object"
+    },
+    {
+        "instruction": "Go to the largest object",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch",
+            "ShortGreenTorch",
+            "GreenTorch",
+            "RedTorch",
+            "ShortRedTorch",
+            "ShortGreenColumn",
+            "BlueArmor",
+            "TallGreenColumn",
+            "ShortRedColumn",
+            "GreenArmor",
+            "TallRedColumn"
+        ],
+        "description": "largest object"
+    },
+    {
+        "instruction": "Go to the smallest object",
+        "targets": [
+            "RedSkull",
+            "YellowSkull",
+            "ShortBlueTorch",
+            "ShortGreenTorch",
+            "YellowCard",
+            "ShortRedTorch",
+            "RedCard",
+            "BlueCard",
+            "BlueArmor",
+            "GreenArmor",
+            "BlueSkull"
+        ],
+        "description": "smallest object"
+    },
+    {
+        "instruction": "Go to the green short object",
+        "targets": [
+            "ShortGreenTorch",
+            "ShortGreenColumn"
+        ],
+        "description": "green short object"
+    },
+    {
+        "instruction": "Go to the short green object",
+        "targets": [
+            "ShortGreenTorch",
+            "ShortGreenColumn"
+        ],
+        "description": "short green object"
+    },
+    {
+        "instruction": "Go to the tall pillar",
+        "targets": [
+            "TallRedColumn",
+            "TallGreenColumn"
+        ],
+        "description": "tall pillar"
+    },
+    {
+        "instruction": "Go to the green short torch",
+        "targets": [
+            "ShortGreenTorch"
+        ],
+        "description": "green short torch"
+    },
+    {
+        "instruction": "Go to the short pillar",
+        "targets": [
+            "ShortGreenColumn",
+            "ShortRedColumn"
+        ],
+        "description": "short pillar"
+    },
+    {
+        "instruction": "Go to the short object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn",
+            "ShortGreenTorch",
+            "ShortBlueTorch",
+            "ShortGreenColumn"
+        ],
+        "description": "short object"
+    },
+    {
+        "instruction": "Go to the largest yellow object",
+        "targets": [
+            "YellowCard",
+            "YellowSkull"
+        ],
+        "description": "largest yellow object"
+    },
+    {
+        "instruction": "Go to the blue skullkey",
+        "targets": [
+            "BlueSkull"
+        ],
+        "description": "blue skullkey"
+    },
+    {
+        "instruction": "Go to the blue keycard",
+        "targets": [
+            "BlueCard"
+        ],
+        "description": "blue keycard"
+    },
+    {
+        "instruction": "Go to the smallest red object",
+        "targets": [
+            "RedSkull",
+            "TallRedColumn",
+            "RedTorch",
+            "ShortRedTorch",
+            "RedCard",
+            "BlueArmor",
+            "ShortRedColumn"
+        ],
+        "description": "smallest red object"
+    },
+    {
+        "instruction": "Go to the short green pillar",
+        "targets": [
+            "ShortGreenColumn"
+        ],
+        "description": "short green pillar"
+    },
+    {
+        "instruction": "Go to the green short pillar",
+        "targets": [
+            "ShortGreenColumn"
+        ],
+        "description": "green short pillar"
+    },
+    {
+        "instruction": "Go to the yellow keycard",
+        "targets": [
+            "YellowCard"
+        ],
+        "description": "yellow keycard"
+    },
+    {
+        "instruction": "Go to the short blue torch",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "short blue torch"
+    },
+    {
+        "instruction": "Go to the blue short torch",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "blue short torch"
+    },
+    {
+        "instruction": "Go to the yellow skullkey",
+        "targets": [
+            "YellowSkull"
+        ],
+        "description": "yellow skullkey"
+    },
+    {
+        "instruction": "Go to the blue tall torch",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "blue tall torch"
+    },
+    {
+        "instruction": "Go to the tall blue torch",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "tall blue torch"
+    },
+    {
+        "instruction": "Go to the torch",
+        "targets": [
+            "BlueTorch",
+            "GreenTorch",
+            "RedTorch",
+            "ShortRedTorch",
+            "ShortGreenTorch",
+            "ShortBlueTorch"
+        ],
+        "description": "torch"
+    },
+    {
+        "instruction": "Go to the keycard",
+        "targets": [
+            "YellowCard",
+            "RedCard",
+            "BlueCard"
+        ],
+        "description": "keycard"
+    },
+    {
+        "instruction": "Go to the skullkey",
+        "targets": [
+            "RedSkull",
+            "BlueSkull",
+            "YellowSkull"
+        ],
+        "description": "skullkey"
+    },
+    {
+        "instruction": "Go to the pillar",
+        "targets": [
+            "TallRedColumn",
+            "TallGreenColumn",
+            "ShortGreenColumn",
+            "ShortRedColumn"
+        ],
+        "description": "pillar"
+    },
+    {
+        "instruction": "Go to the armor",
+        "targets": [
+            "GreenArmor",
+            "BlueArmor"
+        ],
+        "description": "armor"
+    }
+]
diff --git a/algorithms/a3c/vizdoom_data/instructions_test.json b/algorithms/a3c/vizdoom_data/instructions_test.json
new file mode 100644
index 0000000..9420e1b
--- /dev/null
+++ b/algorithms/a3c/vizdoom_data/instructions_test.json
@@ -0,0 +1,114 @@
+[
+    {
+        "instruction": "Go to the green tall torch",
+        "targets": [
+            "GreenTorch"
+        ],
+        "description": "green tall torch"
+    },
+    {
+        "instruction": "Go to the red short pillar",
+        "targets": [
+            "ShortRedColumn"
+        ],
+        "description": "red short pillar"
+    },
+    {
+        "instruction": "Go to the smallest yellow object",
+        "targets": [
+            "YellowCard",
+            "YellowSkull"
+        ],
+        "description": "smallest yellow object"
+    },
+    {
+        "instruction": "Go to the blue torch",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch"
+        ],
+        "description": "blue torch"
+    },
+    {
+        "instruction": "Go to the red short torch",
+        "targets": [
+            "ShortRedTorch"
+        ],
+        "description": "red short torch"
+    },
+    {
+        "instruction": "Go to the short blue object",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "short blue object"
+    },
+    {
+        "instruction": "Go to the largest blue object",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch",
+            "BlueCard",
+            "BlueSkull"
+        ],
+        "description": "largest blue object"
+    },
+    {
+        "instruction": "Go to the short green torch",
+        "targets": [
+            "ShortGreenTorch"
+        ],
+        "description": "short green torch"
+    },
+    {
+        "instruction": "Go to the blue keycard",
+        "targets": [
+            "BlueCard"
+        ],
+        "description": "blue keycard"
+    },
+    {
+        "instruction": "Go to the short red torch",
+        "targets": [
+            "ShortRedTorch"
+        ],
+        "description": "short red torch"
+    },
+    {
+        "instruction": "Go to the short green pillar",
+        "targets": [
+            "ShortGreenColumn"
+        ],
+        "description": "short green pillar"
+    },
+    {
+        "instruction": "Go to the tall green object",
+        "targets": [
+            "TallGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "tall green object"
+    },
+    {
+        "instruction": "Go to the green armor",
+        "targets": [
+            "GreenArmor"
+        ],
+        "description": "green armor"
+    },
+    {
+        "instruction": "Go to the tall red pillar",
+        "targets": [
+            "TallRedColumn"
+        ],
+        "description": "tall red pillar"
+    },
+    {
+        "instruction": "Go to the tall pillar",
+        "targets": [
+            "TallRedColumn",
+            "TallGreenColumn"
+        ],
+        "description": "tall pillar"
+    }
+]
diff --git a/algorithms/a3c/vizdoom_data/instructions_train.json b/algorithms/a3c/vizdoom_data/instructions_train.json
new file mode 100644
index 0000000..6f1ffd0
--- /dev/null
+++ b/algorithms/a3c/vizdoom_data/instructions_train.json
@@ -0,0 +1,482 @@
+[
+    {
+        "instruction": "Go to the short red object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn"
+        ],
+        "description": "short red object"
+    },
+    {
+        "instruction": "Go to the red short object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn"
+        ],
+        "description": "red short object"
+    },
+    {
+        "instruction": "Go to the red torch",
+        "targets": [
+            "RedTorch",
+            "ShortRedTorch"
+        ],
+        "description": "red torch"
+    },
+    {
+        "instruction": "Go to the green torch",
+        "targets": [
+            "ShortGreenTorch",
+            "GreenTorch"
+        ],
+        "description": "green torch"
+    },
+    {
+        "instruction": "Go to the tall green torch",
+        "targets": [
+            "GreenTorch"
+        ],
+        "description": "tall green torch"
+    },
+    {
+        "instruction": "Go to the smallest green object",
+        "targets": [
+            "GreenArmor",
+            "TallGreenColumn",
+            "ShortGreenTorch",
+            "ShortGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "smallest green object"
+    },
+    {
+        "instruction": "Go to the short torch",
+        "targets": [
+            "ShortRedTorch",
+            "ShortGreenTorch",
+            "ShortBlueTorch"
+        ],
+        "description": "short torch"
+    },
+    {
+        "instruction": "Go to the tall torch",
+        "targets": [
+            "RedTorch",
+            "GreenTorch",
+            "BlueTorch"
+        ],
+        "description": "tall torch"
+    },
+    {
+        "instruction": "Go to the short red pillar",
+        "targets": [
+            "ShortRedColumn"
+        ],
+        "description": "short red pillar"
+    },
+    {
+        "instruction": "Go to the red tall object",
+        "targets": [
+            "TallRedColumn",
+            "RedTorch"
+        ],
+        "description": "red tall object"
+    },
+    {
+        "instruction": "Go to the tall red object",
+        "targets": [
+            "TallRedColumn",
+            "RedTorch"
+        ],
+        "description": "tall red object"
+    },
+    {
+        "instruction": "Go to the smallest blue object",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch",
+            "BlueCard",
+            "BlueSkull"
+        ],
+        "description": "smallest blue object"
+    },
+    {
+        "instruction": "Go to the yellow object",
+        "targets": [
+            "YellowCard",
+            "YellowSkull"
+        ],
+        "description": "yellow object"
+    },
+    {
+        "instruction": "Go to the tall object",
+        "targets": [
+            "TallRedColumn",
+            "TallGreenColumn",
+            "BlueTorch",
+            "RedTorch",
+            "GreenTorch"
+        ],
+        "description": "tall object"
+    },
+    {
+        "instruction": "Go to the red skullkey",
+        "targets": [
+            "RedSkull"
+        ],
+        "description": "red skullkey"
+    },
+    {
+        "instruction": "Go to the red object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn",
+            "TallRedColumn",
+            "RedTorch",
+            "RedSkull",
+            "RedCard",
+            "BlueArmor"
+        ],
+        "description": "red object"
+    },
+    {
+        "instruction": "Go to the green object",
+        "targets": [
+            "GreenArmor",
+            "TallGreenColumn",
+            "ShortGreenTorch",
+            "ShortGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "green object"
+    },
+    {
+        "instruction": "Go to the blue object",
+        "targets": [
+            "ShortBlueTorch",
+            "BlueTorch",
+            "BlueCard",
+            "BlueSkull"
+        ],
+        "description": "blue object"
+    },
+    {
+        "instruction": "Go to the green tall object",
+        "targets": [
+            "TallGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "green tall object"
+    },
+    {
+        "instruction": "Go to the tall blue object",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "tall blue object"
+    },
+    {
+        "instruction": "Go to the blue tall object",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "blue tall object"
+    },
+    {
+        "instruction": "Go to the largest red object",
+        "targets": [
+            "RedSkull",
+            "TallRedColumn",
+            "RedTorch",
+            "ShortRedTorch",
+            "RedCard",
+            "BlueArmor",
+            "ShortRedColumn"
+        ],
+        "description": "largest red object"
+    },
+    {
+        "instruction": "Go to the red pillar",
+        "targets": [
+            "TallRedColumn",
+            "ShortRedColumn"
+        ],
+        "description": "red pillar"
+    },
+    {
+        "instruction": "Go to the red tall pillar",
+        "targets": [
+            "TallRedColumn"
+        ],
+        "description": "red tall pillar"
+    },
+    {
+        "instruction": "Go to the tall green pillar",
+        "targets": [
+            "TallGreenColumn"
+        ],
+        "description": "tall green pillar"
+    },
+    {
+        "instruction": "Go to the green tall pillar",
+        "targets": [
+            "TallGreenColumn"
+        ],
+        "description": "green tall pillar"
+    },
+    {
+        "instruction": "Go to the green pillar",
+        "targets": [
+            "TallGreenColumn",
+            "ShortGreenColumn"
+        ],
+        "description": "green pillar"
+    },
+    {
+        "instruction": "Go to the red tall torch",
+        "targets": [
+            "RedTorch"
+        ],
+        "description": "red tall torch"
+    },
+    {
+        "instruction": "Go to the tall red torch",
+        "targets": [
+            "RedTorch"
+        ],
+        "description": "tall red torch"
+    },
+    {
+        "instruction": "Go to the largest green object",
+        "targets": [
+            "GreenArmor",
+            "TallGreenColumn",
+            "ShortGreenTorch",
+            "ShortGreenColumn",
+            "GreenTorch"
+        ],
+        "description": "largest green object"
+    },
+    {
+        "instruction": "Go to the red keycard",
+        "targets": [
+            "RedCard"
+        ],
+        "description": "red keycard"
+    },
+    {
+        "instruction": "Go to the red armor",
+        "targets": [
+            "BlueArmor"
+        ],
+        "description": "red armor"
+    },
+    {
+        "instruction": "Go to the blue short object",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "blue short object"
+    },
+    {
+        "instruction": "Go to the largest object",
+        "targets": [
+            "ShortBlueTorch",
+            "ShortGreenTorch",
+            "GreenTorch",
+            "RedTorch",
+            "ShortRedTorch",
+            "ShortGreenColumn",
+            "BlueArmor",
+            "TallGreenColumn",
+            "ShortRedColumn",
+            "GreenArmor",
+            "TallRedColumn"
+        ],
+        "description": "largest object"
+    },
+    {
+        "instruction": "Go to the smallest object",
+        "targets": [
+            "RedSkull",
+            "YellowSkull",
+            "ShortBlueTorch",
+            "ShortGreenTorch",
+            "YellowCard",
+            "ShortRedTorch",
+            "RedCard",
+            "BlueCard",
+            "BlueArmor",
+            "GreenArmor",
+            "BlueSkull"
+        ],
+        "description": "smallest object"
+    },
+    {
+        "instruction": "Go to the green short object",
+        "targets": [
+            "ShortGreenTorch",
+            "ShortGreenColumn"
+        ],
+        "description": "green short object"
+    },
+    {
+        "instruction": "Go to the short green object",
+        "targets": [
+            "ShortGreenTorch",
+            "ShortGreenColumn"
+        ],
+        "description": "short green object"
+    },
+    {
+        "instruction": "Go to the green short torch",
+        "targets": [
+            "ShortGreenTorch"
+        ],
+        "description": "green short torch"
+    },
+    {
+        "instruction": "Go to the short pillar",
+        "targets": [
+            "ShortGreenColumn",
+            "ShortRedColumn"
+        ],
+        "description": "short pillar"
+    },
+    {
+        "instruction": "Go to the short object",
+        "targets": [
+            "ShortRedTorch",
+            "ShortRedColumn",
+            "ShortGreenTorch",
+            "ShortBlueTorch",
+            "ShortGreenColumn"
+        ],
+        "description": "short object"
+    },
+    {
+        "instruction": "Go to the largest yellow object",
+        "targets": [
+            "YellowCard",
+            "YellowSkull"
+        ],
+        "description": "largest yellow object"
+    },
+    {
+        "instruction": "Go to the blue skullkey",
+        "targets": [
+            "BlueSkull"
+        ],
+        "description": "blue skullkey"
+    },
+    {
+        "instruction": "Go to the smallest red object",
+        "targets": [
+            "RedSkull",
+            "TallRedColumn",
+            "RedTorch",
+            "ShortRedTorch",
+            "RedCard",
+            "BlueArmor",
+            "ShortRedColumn"
+        ],
+        "description": "smallest red object"
+    },
+    {
+        "instruction": "Go to the green short pillar",
+        "targets": [
+            "ShortGreenColumn"
+        ],
+        "description": "green short pillar"
+    },
+    {
+        "instruction": "Go to the yellow keycard",
+        "targets": [
+            "YellowCard"
+        ],
+        "description": "yellow keycard"
+    },
+    {
+        "instruction": "Go to the short blue torch",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "short blue torch"
+    },
+    {
+        "instruction": "Go to the blue short torch",
+        "targets": [
+            "ShortBlueTorch"
+        ],
+        "description": "blue short torch"
+    },
+    {
+        "instruction": "Go to the yellow skullkey",
+        "targets": [
+            "YellowSkull"
+        ],
+        "description": "yellow skullkey"
+    },
+    {
+        "instruction": "Go to the blue tall torch",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "blue tall torch"
+    },
+    {
+        "instruction": "Go to the tall blue torch",
+        "targets": [
+            "BlueTorch"
+        ],
+        "description": "tall blue torch"
+    },
+    {
+        "instruction": "Go to the torch",
+        "targets": [
+            "BlueTorch",
+            "GreenTorch",
+            "RedTorch",
+            "ShortRedTorch",
+            "ShortGreenTorch",
+            "ShortBlueTorch"
+        ],
+        "description": "torch"
+    },
+    {
+        "instruction": "Go to the keycard",
+        "targets": [
+            "YellowCard",
+            "RedCard",
+            "BlueCard"
+        ],
+        "description": "keycard"
+    },
+    {
+        "instruction": "Go to the skullkey",
+        "targets": [
+            "RedSkull",
+            "BlueSkull",
+            "YellowSkull"
+        ],
+        "description": "skullkey"
+    },
+    {
+        "instruction": "Go to the pillar",
+        "targets": [
+            "TallRedColumn",
+            "TallGreenColumn",
+            "ShortGreenColumn",
+            "ShortRedColumn"
+        ],
+        "description": "pillar"
+    },
+    {
+        "instruction": "Go to the armor",
+        "targets": [
+            "GreenArmor",
+            "BlueArmor"
+        ],
+        "description": "armor"
+    }
+]
diff --git a/algorithms/a3c/vizdoom_data/object_sizes.txt b/algorithms/a3c/vizdoom_data/object_sizes.txt
new file mode 100644
index 0000000..f82e3b5
--- /dev/null
+++ b/algorithms/a3c/vizdoom_data/object_sizes.txt
@@ -0,0 +1,53 @@
+50	Stalagtite	2812	127
+46	TallRedColumn	2667	127
+43	TallGreenColumn	2632	127
+34	RedTorch	2487	127
+29	BlueTorch	2409	127
+21	TorchTree	2245	127
+33	SkullColumn	2186	127
+45	ShortRedColumn	2052	127
+19	HeartColumn	2016	127
+35	GreenTorch	1963	127
+44	ShortGreenColumn	1911	127
+12	HeadCandles	1845	127
+14	RocketBox	1729	127
+48	Candelabra	1533	127
+8	RadSuit	1413	127
+4	Allmap	1404	127
+2	RocketLauncher	1237	127
+51	Chaingun	1007	127
+52	Medikit	971	127
+28	CellPack	917	127
+22	Backpack	880	127
+26	HeadsOnAStick	877	127
+9	BlueArmor	851	127
+47	Chainsaw	835	127
+36	ShortRedTorch	828	127
+3	PlasmaRifle	822	127
+37	ShortGreenTorch	813	127
+38	ShortBlueTorch	805	127
+17	HeadOnAStick	791	127
+32	Soulsphere	733	127
+49	InvulnerabilitySphere	733	127
+53	Megasphere	733	127
+42	ClipBox	696	127
+20	GreenArmor	616	127
+39	Infrared	537	127
+31	SuperShotgun	505	127
+41	Shotgun	451	127
+5	Berserk	408	127
+10	ShellBox	395	127
+15	ArmorBonus	368	127
+54	Stimpack	360	127
+11	BlueCard	333	127
+23	YellowCard	315	127
+27	RedCard	315	127
+18	RocketAmmo	311	127
+25	HealthBonus	292	127
+16	Cell	268	127
+0	YellowSkull	236	127
+1	BlueSkull	236	127
+6	RedSkull	236	127
+30	Candlestick	168	127
+40	Shell	148	127
+7	Clip	112	127
diff --git a/algorithms/a3c/vizdoom_maps/room.wad b/algorithms/a3c/vizdoom_maps/room.wad
new file mode 100644
index 0000000..c9c1281
Binary files /dev/null and b/algorithms/a3c/vizdoom_maps/room.wad differ
diff --git a/algorithms/a3c/vizdoom_utils/constants.py b/algorithms/a3c/vizdoom_utils/constants.py
new file mode 100644
index 0000000..d4d3af6
--- /dev/null
+++ b/algorithms/a3c/vizdoom_utils/constants.py
@@ -0,0 +1,34 @@
+# Difference in size of the objects to be considered larger or smaller
+SIZE_THRESHOLD = 100
+
+# Maximum distance from the object to receive a reward
+REWARD_THRESHOLD_DISTANCE = 40
+
+# Rewards for reaching the correct and wrong objects
+CORRECT_OBJECT_REWARD = 1.0
+WRONG_OBJECT_REWARD = -0.2
+
+# Size of the map
+MAP_SIZE_X = 384
+MAP_SIZE_Y = 384
+
+# Map offsets in doom coordinates
+Y_OFFSET = 320
+X_OFFSET = 0
+
+# Margin to avoid objects overlapping with the walls
+MARGIN = 32
+
+# Distance between y-coordinates of two objects in Easy and Medium environments
+OBJECT_Y_DIST = 64
+
+# X-coordinate of all objects in the Easy environment
+EASY_ENV_OBJECT_X = 256
+
+# Range of x coordinates of all objects in the Medium environment
+MEDIUM_ENV_OBJECT_X_MIN = 192
+MEDIUM_ENV_OBJECT_X_MAX = 352
+
+# Minimum distance between any two objects or an object and
+# the agent in the Hard environment
+HARD_ENV_OBJ_DIST_THRESHOLD = 90
diff --git a/algorithms/a3c/vizdoom_utils/doom.py b/algorithms/a3c/vizdoom_utils/doom.py
new file mode 100644
index 0000000..92608b2
--- /dev/null
+++ b/algorithms/a3c/vizdoom_utils/doom.py
@@ -0,0 +1,132 @@
+from vizdoom import *
+import re
+import cv2
+
+def set_doom_configuration(game, params):
+    game.set_doom_scenario_path(params.scenario_path)
+
+    if (params.visualize):
+        # Use a bigger screen size when visualizing
+        game.set_screen_resolution(ScreenResolution.RES_800X450)
+    else:
+        # Use a smaller screen size for faster simulation
+        game.set_screen_resolution(ScreenResolution.RES_400X225)
+
+    game.set_screen_format(ScreenFormat.RGB24)
+    game.set_depth_buffer_enabled(True)
+    game.set_labels_buffer_enabled(True)
+    game.set_automap_buffer_enabled(True)
+
+    # sets other rendering options
+    game.set_render_hud(False)
+    game.set_render_minimal_hud(False)  # if hud is enabled
+    game.set_render_crosshair(False)
+    game.set_render_weapon(False)
+    game.set_render_decals(False)
+    game.set_render_particles(False)
+    game.set_render_effects_sprites(False)
+    game.set_render_messages(False)
+    game.set_render_corpses(False)
+
+    game.add_available_button(Button.TURN_LEFT)
+    game.add_available_button(Button.TURN_RIGHT)
+    game.add_available_button(Button.MOVE_FORWARD)
+
+    game.set_episode_timeout(30000)
+
+    # makes episodes start after 10 tics
+    game.set_episode_start_time(10)
+
+    # makes the window appear (turned on by default)
+    game.set_window_visible(True if params.visualize else False)
+
+    if params.interactive:
+        game.set_mode(Mode.SPECTATOR)
+    else:
+        game.set_mode(Mode.PLAYER)
+
+    return game
+
+
+def get_doom_coordinates(x, y):
+    return int(x) * 256 * 256, int(y) * 256 * 256
+
+
+def get_world_coordinates(x):
+    return x / (256 * 256)
+
+
+def get_agent_location(game):
+    x = get_world_coordinates(game.get_game_variable(GameVariable.USER3))
+    y = get_world_coordinates(game.get_game_variable(GameVariable.USER4))
+    return x, y
+
+
+def spawn_object(game, object_id, x, y):
+    x_pos, y_pos = get_doom_coordinates(x, y)
+    # call spawn function twice because vizdoom objects are not spawned
+    # sometimes if spawned only once for some unknown reason
+    for _ in range(2):
+        game.send_game_command("pukename spawn_object_by_id_and_location \
+                                %i %i %i" % (object_id, x_pos, y_pos))
+        pause_game(game, 1)
+
+
+def spawn_agent(game, x, y, orientation):
+    x_pos, y_pos = get_doom_coordinates(x, y)
+    game.send_game_command("pukename set_position %i %i %i" %
+                           (x_pos, y_pos, orientation))
+
+
+def pause_game(game, steps):
+    for i in range(1):
+        r = game.make_action([False, False, False])
+
+
+def split_object(object_string):
+    split_word = re.findall('[A-Z][^A-Z]*', object_string)
+    split_word.reverse()
+    return split_word
+
+
+def get_l2_distance(x1, x2, y1, y2):
+    """
+    Computes the L2 distance between two points
+    """
+    return ((x1-y1)**2 + (x2-y2)**2)**0.5
+
+
+def process_screen(screen, height, width):
+    """
+    Resize the screen.
+    """
+    if screen.shape != (3, height, width):
+        screen = cv2.resize(screen, (width, height),
+                            interpolation=cv2.INTER_AREA).transpose(2, 0, 1)
+    return screen
+
+
+class DoomObject(object):
+    def __init__(self, *args):
+        self.name = ''.join(list(reversed(args)))
+        self.type = args[0]
+
+        try:
+            # Bug in Vizdoom, BlueArmor is actually red.
+            # I can see your expression ! ;-)
+            if self.name == 'BlueArmor':
+                self.color = 'Red'
+            else:
+                self.color = args[1]
+        except IndexError:
+            self.color = None
+
+        try:
+            self.relative_size = args[2]
+        except IndexError:
+            self.relative_size = None
+
+        try:
+            self.absolute_size = args[3]
+        except IndexError:
+            self.absolute_size = None
diff --git a/algorithms/a3c/vizdoom_utils/points.py b/algorithms/a3c/vizdoom_utils/points.py
new file mode 100644
index 0000000..7581b23
--- /dev/null
+++ b/algorithms/a3c/vizdoom_utils/points.py
@@ -0,0 +1,217 @@
+"""
+This is a python implementation of the poisson-disc algorithm.
+Poisson-disc code is borrowed from
+IHautal at https://github.com/IHautaI/poisson-disc
+
+Poisson-disc algorithm produces points in a grid,
+but no closer to each other than a specified minimum distance
+
+For more details about this algorithm :
+http://www.cs.ubc.ca/~rbridson/docs/bridson-siggraph07-poissondisk.pdf
+"""
+
+import random
+from math import sqrt, pi, sin, cos
+from itertools import product
+
+
+def generate(grid):
+    """
+    build the grid for generating the points
+    """
+    def func(point):
+        new = [random.choice([random.uniform(-grid.r*2, 0),
+               random.uniform(0, grid.r*2)]) for _ in range(len(point))]
+        return tuple(new[i] + point[i] for i in range(len(point)))
+    return func
+
+
+def generate_points(r, length, width, n_points, rand=None):
+    """
+    generate n_points over a grid of a given length and width
+    """
+    grid = Grid(r, length, width)
+    grid.generate = generate(grid)
+    if rand is None:
+        rand = (random.uniform(0, length), random.uniform(0, width))
+
+    for i in range(100):
+        data = grid.poisson(rand, n_points)
+        if len(data) != n_points:
+            continue
+        else:
+            return data
+
+
+class Grid:
+    """
+    class for filling a rectangular prism of dimension >= 2
+    with poisson disc samples spaced at least r apart
+    and k attempts per active sample
+    override Grid.distance to change
+    distance metric used and get different forms
+    of 'discs'
+    """
+    def __init__(self, r, *size):
+        self.r = r
+
+        self.size = size
+        self.dim = len(size)
+
+        self.cell_size = r/(sqrt(self.dim))
+
+        self.widths = [int(size[k]/self.cell_size)+1 for k in range(self.dim)]
+
+        nums = product(*(range(self.widths[k]) for k in range(self.dim)))
+
+        self.cells = {num: -1 for num in nums}
+        self.samples = []
+        self.active = []
+
+    def clear(self):
+        """
+        resets the grid
+        active points and
+        sample points
+        """
+        self.samples = []
+        self.active = []
+
+        for item in self.cells:
+            self.cells[item] = -1
+
+    def generate(self, point):
+        """
+        generates new points
+        in an annulus between
+        self.r, 2*self.r
+        """
+
+        rad = random.triangular(self.r, 2*self.r, .3*(2*self.r - self.r))
+        # was random.uniform(self.r, 2*self.r) but I think
+        # this may be closer to the correct distribution
+        # but easier to build
+
+        angs = [random.uniform(0, 2*pi)]
+
+        if self.dim > 2:
+            angs.extend(random.uniform(-pi/2, pi/2) for _ in range(self.dim-2))
+
+        angs[0] = 2*angs[0]
+
+        return self.convert(point, rad, angs)
+
+    def poisson(self, seed, k=30):
+        """
+        generates a set of poisson disc samples
+        """
+        self.clear()
+
+        self.samples.append(seed)
+        self.active.append(0)
+        self.update(seed, 0)
+
+        while len(self.samples) < k and self.active:
+
+            idx = random.choice(self.active)
+            point = self.samples[idx]
+            new_point = self.make_points(k, point)
+
+            if new_point:
+                self.samples.append(tuple(new_point))
+                self.active.append(len(self.samples)-1)
+                self.update(new_point, len(self.samples)-1)
+            else:
+                self.active.remove(idx)
+
+        return self.samples
+
+    def make_points(self, k, point):
+        """
+        uses generate to make up to
+        k new points, stopping
+        when it finds a good sample
+        using self.check
+        """
+        n = k
+
+        while n:
+            new_point = self.generate(point)
+            if self.check(point, new_point):
+                return new_point
+
+            n -= 1
+
+        return False
+
+    def check(self, point, new_point):
+        """
+        checks the neighbors of the point
+        and the new_point
+        against the new_point
+        returns True if none are closer than r
+        """
+        for i in range(self.dim):
+            if not (0 < new_point[i] < self.size[i] or
+               self.cellify(new_point) == -1):
+                return False
+
+        for item in self.neighbors(self.cellify(point)):
+            if self.distance(self.samples[item], new_point) < self.r**2:
+                return False
+
+        for item in self.neighbors(self.cellify(new_point)):
+            if self.distance(self.samples[item], new_point) < self.r**2:
+                return False
+
+        return True
+
+    def convert(self, point, rad, angs):
+        """
+        converts the random point
+        to rectangular coordinates
+        from radial coordinates centered
+        on the active point
+        """
+        new_point = [point[0] + rad*cos(angs[0]), point[1] + rad*sin(angs[0])]
+        if len(angs) > 1:
+            new_point.extend(point[i+1] + rad*sin(angs[i])
+                             for i in range(1, len(angs)))
+        return new_point
+
+    def cellify(self, point):
+        """
+        returns the cell in which the point falls
+        """
+        return tuple(point[i]//self.cell_size for i in range(self.dim))
+
+    def distance(self, tup1, tup2):
+        """
+        returns squared distance between two points
+        """
+        return sum((tup1[k] - tup2[k])**2 for k in range(self.dim))
+
+    def cell_distance(self, tup1, tup2):
+        """
+        returns true if the L1 distance is less than 2
+        for the two tuples
+        """
+        return sum(abs(tup1[k]-tup2[k]) for k in range(self.dim)) <= 2
+
+    def neighbors(self, cell):
+        """
+        finds all occupied cells within
+        a distance of the given point
+        """
+        return (self.cells[tup] for tup in self.cells
+                if self.cells[tup] != -1 and
+                self.cell_distance(cell, tup))
+
+    def update(self, point, index):
+        """
+        updates the grid with the new point
+        """
+        self.cells[self.cellify(point)] = index
+
+    def __str__(self):
+        return self.cells.__str__()
diff --git a/examples/ai2thor_examples.py b/examples/ai2thor_examples.py
new file mode 100644
index 0000000..e6a41d3
--- /dev/null
+++ b/examples/ai2thor_examples.py
@@ -0,0 +1,161 @@
+import threading
+import time
+
+import ai2thor.controller
+
+
+def run_simple_example():
+    """
+    Taken from here: http://ai2thor.allenai.org/tutorials/examples
+    """
+    controller = ai2thor.controller.Controller()
+    controller.start()
+
+    # Kitchens: FloorPlan1 - FloorPlan30
+    # Living rooms: FloorPlan201 - FloorPlan230
+    # Bedrooms: FloorPlan301 - FloorPlan330
+    # Bathrooms: FloorPLan401 - FloorPlan430
+
+    controller.reset('FloorPlan28')
+    controller.step(dict(action='Initialize', gridSize=0.25))
+
+    event = controller.step(dict(action='MoveAhead'))
+
+    # Numpy Array - shape (width, height, channels), channels are in RGB order
+    event.frame
+
+    # Numpy Array in BGR order suitable for use with OpenCV
+    event.cv2img
+
+    # current metadata dictionary that includes the state of the scene
+    event.metadata
+
+    controller.stop()
+
+def run_calling_complex_actions():
+    """
+    Examples of how to interact with environment internals e.g. picking up, placing and
+    opening objects.
+    Taken from here: http://ai2thor.allenai.org/tutorials/examples
+    """
+    controller = ai2thor.controller.Controller()
+    controller.start()
+
+    controller.reset('FloorPlan28')
+    controller.step(dict(action='Initialize', gridSize=0.25))
+
+    controller.step(dict(action='Teleport', x=-1.25, y=1.00, z=-1.5))
+    controller.step(dict(action='LookDown'))
+    event = controller.step(dict(action='Rotate', rotation=90))
+    # In FloorPlan28, the agent should now be looking at a mug
+    for obj in event.metadata['objects']:
+        if obj['visible'] and obj['pickupable'] and obj['objectType'] == 'Mug':
+            event = controller.step(dict(action='PickupObject', objectId=obj['objectId']),
+                                    raise_for_failure=True)
+            mug_object_id = obj['objectId']
+            break
+
+    # the agent now has the Mug in its inventory
+    # to put it into the Microwave, we need to open the microwave first
+
+    event = controller.step(dict(action='LookUp'))
+    for obj in event.metadata['objects']:
+        if obj['visible'] and obj['openable'] and obj['objectType'] == 'Microwave':
+            event = controller.step(dict(action='OpenObject', objectId=obj['objectId']),
+                                    raise_for_failure=True)
+            receptacle_object_id = obj['objectId']
+            break
+
+    event = controller.step(dict(action='MoveRight'), raise_for_failure=True)
+    event = controller.step(dict(action='PutObject',
+                                 receptacleObjectId=receptacle_object_id,
+                                 objectId=mug_object_id),
+                            raise_for_failure=True)
+
+    # close the microwave
+    event = controller.step(dict(
+        action='CloseObject',
+        objectId=receptacle_object_id), raise_for_failure=True)
+
+    controller.stop()
+
+def run_multithreaded():
+    """
+    Stress test and also shows how multi-threading can be used to greatly speed up processing,
+    specially to support the rendering of class, object and depth images.
+    Adapted from here: http://ai2thor.allenai.org/tutorials/examples
+
+    Extra analysis done on adding unity information. Important for training models to know.
+    ~67 FPS with 1 thread no extra info
+    ~61 FPS with 1 thread added class info
+    ~18 FPS with 1 thread added Object info on top
+    ~17 FPS with 1 thread added Depth info on top
+
+    ~70 FPS with 2 threads and no depth, class and object image
+    ~15 FPS with 2 threads and all three of those
+
+    Good examples of how to multi-thread are below
+    """
+    thread_count = 3
+
+    def run(thread_num):
+        """
+        Runs 5 iterations of 10 steps of the environment with the different rendering options
+        :param thread_num: (int) Number of threads to launch
+        """
+        env = ai2thor.controller.Controller()
+        env.start()
+
+        render_depth_image, render_class_image, render_object_image = False, False, False
+
+        for i in range(5):
+            t_start = time.time()
+            env.reset('FloorPlan1')
+            env.step({'action': 'Initialize', 'gridSize': 0.25})
+
+            # Compare the performance with all the extra added information
+            # Big take away is that Object instance information makes it much slower
+            if i == 2:
+                render_class_image = True
+                print('Thread num: {}. Added Class info'.format(thread_num))
+            elif i == 3:
+                render_object_image = True
+                print('Thread num: {}. Added Object info'.format(thread_num))
+            elif i == 4:
+                render_depth_image = True
+                print('Thread num: {}. Added Depth info'.format(thread_num))
+
+            env.step(dict(action='Initialize',
+                          gridSize=0.25,
+                          renderDepthImage=render_depth_image,
+                          renderClassImage=render_class_image,
+                          renderObjectImage=render_object_image))
+            print('Thread num: {}. init time: {}'.format(thread_num, time.time() - t_start))
+            t_start_total = time.time()
+            num_steps = 50
+            for _ in range(num_steps):
+                env.step({'action': 'MoveAhead'})
+            total_time = time.time() - t_start_total
+            print('Thread num: {}. Total time for 10 steps: {}. {:.2f} fps'.
+                  format(thread_num, total_time, num_steps / total_time))
+        env.stop()
+
+    threads = [threading.Thread(target=run, args=(thread_num, ))
+               for thread_num in range(thread_count)]
+    for thread in threads:
+        thread.daemon = True
+        thread.start()
+        time.sleep(1)
+
+    for thread in threads:
+        # calling join() in a loop/timeout to allow for Python 2.7
+        # to be interrupted with SIGINT
+        while thread.isAlive():
+            thread.join(1)
+
+    print('done')
+
+if __name__ == '__main__':
+    run_simple_example()
+    run_calling_complex_actions()
+    run_multithreaded()
diff --git a/examples/ai2thor_inbuilt_interactive_mode.py b/examples/ai2thor_inbuilt_interactive_mode.py
new file mode 100644
index 0000000..2dbd226
--- /dev/null
+++ b/examples/ai2thor_inbuilt_interactive_mode.py
@@ -0,0 +1,31 @@
+"""
+Control ai2thor in interactive mode with a unity build path. Needs to be run from terminal.
+If you don't provide a real path, it will try to download the 600+mb ai2thor assets.
+
+Only works from terminal!
+"""
+
+import argparse
+import os
+
+import ai2thor.controller
+
+parser = argparse.ArgumentParser(description='Run ai2thor in interactive mode')
+parser.add_argument('--unity-build-name', type=str,
+                    default='build_bowls_vs_cups_fp1_201_301_4011_v_0.1.x86_64',
+                    help='Path to a unity build with file ending in .x86_64')
+args = parser.parse_args()
+
+controller = ai2thor.controller.Controller()
+# file must be in gym_ai2thor/build_files
+unity_build_abs_file_path = os.path.abspath(os.path.join(__file__, '../../gym_ai2thor/build_files',
+                                                         args.unity_build_name))
+print('Build file path at: {}'.format(unity_build_abs_file_path))
+if not os.path.exists(unity_build_abs_file_path):
+    raise ValueError('Unity build file does not exist')
+controller.local_executable_path = unity_build_abs_file_path
+controller.start()
+
+controller.reset('FloorPlan201')
+controller.step(dict(action='Initialize', gridSize=0.05, cameraY=-0.8, continuous=True))
+controller.interact()
diff --git a/examples/pdb_interactive_and_check_bbox_reward_functions.py b/examples/pdb_interactive_and_check_bbox_reward_functions.py
new file mode 100644
index 0000000..415b658
--- /dev/null
+++ b/examples/pdb_interactive_and_check_bbox_reward_functions.py
@@ -0,0 +1,103 @@
+"""
+Copied and modified from
+http://ai2thor.allenai.org/tutorials/examples
+Added helper functions to draw bounding boxes retrieved from ai2thor
+Also can test some reward functions here
+"""
+
+import numpy as np
+import skimage.color, skimage.transform
+import matplotlib as mpl
+mpl.use('TkAgg')  # or whatever other backend that you want
+from matplotlib import pyplot as plt
+import ai2thor.controller
+
+from gym_ai2thor.task_utils import check_if_focus_and_close_enough_to_object_type, \
+    show_bounding_boxes, show_instance_segmentation
+
+
+def rgb2gray(rgb):
+    return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])
+
+if __name__ == '__main__':
+    # Kitchens: FloorPlan1 - FloorPlan30
+    # Living rooms: FloorPlan201 - FloorPlan230
+    # Bedrooms: FloorPlan301 - FloorPlan330
+    # Bathrooms: FloorPLan401 - FloorPlan430
+
+    controller = ai2thor.controller.Controller()
+    controller.start()
+    controller.reset('FloorPlan28')
+    # event = controller.step(dict(action='Initialize', gridSize=0.25,
+    event = controller.step(dict(action='Initialize', gridSize=0.05,
+                                     renderDepthImage=True,
+                                     renderClassImage=True,
+                                     renderObjectImage=True))
+
+    # Numpy Array - shape (width, height, channels), channels are in RGB order
+    print(event.frame)
+    print(event.frame.shape)
+
+    # to the mug
+    # event = controller.step(dict(action='RotateRight'))
+    # event = controller.step(dict(action='RotateRight'))
+    # event = controller.step(dict(action='MoveAhead'))
+    # event = controller.step(dict(action='MoveAhead'))
+    # event = controller.step(dict(action='RotateRight'))
+    # event = controller.step(dict(action='MoveAhead')) # two more here
+    # event = controller.step(dict(action='RotateLeft'))
+    # event = controller.step(dict(action='MoveAhead'))
+    # event = controller.step(dict(action='MoveAhead'))
+    # event = controller.step(dict(action='MoveAhead'))
+    # event = controller.step(dict(action='MoveAhead'))
+
+    # to the apple with gridSize=0.05
+    event = controller.step(dict(action='RotateRight'))
+    event = controller.step(dict(action='RotateRight'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='RotateRight'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='RotateLeft'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='MoveAhead'))
+    event = controller.step(dict(action='LookDown'))
+    all_object_names = [obj['name'] for obj in event.metadata['objects']]
+    visible_objects = [obj for obj in event.metadata['objects'] if obj['visible']]
+    # for i in range(50): event = controller.step(dict(action='MoveAhead'))
+
+    import pdb;pdb.set_trace()
+
+    # 98 75 126 109 - mug a bit to left middle centre
+    # 165 75 194 109 - mug a bit to middle right bottom
+    show_instance_segmentation(event, ['Mug', 'Apple'])
+    show_bounding_boxes(event)
+    show_bounding_boxes(event, ['mug', 'cup', 'Apple'], lines_between_boxes_and_crosshair=True)
+    bool_list = check_if_focus_and_close_enough_to_object_type(event, 'Apple', verbose=True)
+    # bool_list = check_if_focus_and_close_enough_to_object_type(event, 'Apple', verbose=True,
+    #                                                            distance_threshold_3d=0.8)
+
+    # # Show preprocessed image
+    resolution = (128, 128)
+    img = skimage.transform.resize(event.frame, resolution)
+    plt.imshow(img) # show colour pre-processed (works in 0-1 range)
+    plt.show()
+    img = img.astype(np.float32)
+    gray = rgb2gray(img)
+    gray_unsqueezed = np.expand_dims(gray, 0)
+    gray_3_channel = np.concatenate([gray_unsqueezed, gray_unsqueezed, gray_unsqueezed])
+    gray_3_channel = np.moveaxis(gray_3_channel, 0, 2)
+    plt.imshow(gray_3_channel)
+    plt.show()
+
+    # Can walk and step through environment interactively by copy/pasting commands and
+    # deciding when to show bounding boxes. Can also check reward functions and have full control.
+    import pdb;pdb.set_trace()
+    # try pasting
+    # event = controller.step(dict(action='MoveAhead'))
+    # into the console
diff --git a/examples/random_walk.py b/examples/random_walk.py
index e741560..3127bba 100644
--- a/examples/random_walk.py
+++ b/examples/random_walk.py
@@ -4,6 +4,7 @@
 """
 import time
 
+import gym
 from gym_ai2thor.envs.ai2thor_env import AI2ThorEnv
 
 N_EPISODES = 3
diff --git a/examples/task_ground_continuous_random_walk.py b/examples/task_ground_continuous_random_walk.py
new file mode 100644
index 0000000..bf670d1
--- /dev/null
+++ b/examples/task_ground_continuous_random_walk.py
@@ -0,0 +1,58 @@
+"""
+Here we try different cameraY (to bring the agent to the ground), gridSize (the amount of movement),
+ continuous_movement (not just 90 degree rotations and can move diagonally) and finally a specific unity
+ build. For ours we placed many cups on the ground. Still picks random actions but shows how much
+ we can vary the environment.
+"""
+import time
+import argparse
+
+from gym_ai2thor.envs.ai2thor_env import AI2ThorEnv
+
+parser = argparse.ArgumentParser(description='Provide build path')
+parser.add_argument('--build-file-name', required=True,
+                    help='Required Unity build path for custom scenes (e.g. cups on ground)'
+                         'necessary build file with folder should be in gym_ai2thor/build_files.'
+                         'Also needs build folder with specific name in same path with file')
+args = parser.parse_args()
+
+
+if __name__ == '__main__':
+    config_dict = {
+        'pickup_put_interaction': True,
+        'open_close_interaction': False,  # disable opening/closing objects
+        'openable_objects': [],  # disable opening objects another way
+        'pickup_objects': [
+            "Cup"
+        ],
+        'scene_id': 'FloorPlan1',  # let's try a different room
+        'grayscale': True,
+        'resolution': [128, 128],
+        'cameraY': -0.85,
+        'gridSize': 0.1,  # 0.01
+        'continuous_movement': True,
+        'build_file_name': args.build_path,
+        'task': {
+            'task_name': 'PickUpTask',
+            'target_object': 'Cup'
+        }
+    }
+
+    # Input config_dict to env which will overwrite a few values given in the default config_file.
+    # Therefore, a few warnings will occur
+    env = AI2ThorEnv(config_dict=config_dict)
+    max_episode_length = env.task.max_episode_length
+    N_EPISODES = 3
+    for episode in range(N_EPISODES):
+        start = time.time()
+        state = env.reset()
+        for step_num in range(max_episode_length):
+            action = env.action_space.sample()
+            state, reward, done, _ = env.step(action)
+            if done:
+                break
+
+            if step_num + 1 > 0 and (step_num + 1) % 100 == 0:
+                print('Episode: {}. Step: {}/{}. Time taken for 100: {:.3f}s'.format(episode + 1,
+                                         (step_num + 1), max_episode_length, time.time() - start))
+                start = time.time()
diff --git a/examples/task_variation_random_walk.py b/examples/task_variation_random_walk.py
index bb0a0d8..d585c22 100644
--- a/examples/task_variation_random_walk.py
+++ b/examples/task_variation_random_walk.py
@@ -26,7 +26,7 @@
     }
 
     # Input config_dict to env which will overwrite a few values given in the default config_file.
-    # Therefore, a few warnings will occur
+    # Therefore, a few harmless warnings are expected
     env = AI2ThorEnv(config_dict=config_dict)
     max_episode_length = env.task.max_episode_length
     N_EPISODES = 3
@@ -40,6 +40,6 @@
                 break
 
             if step_num + 1 > 0 and (step_num + 1) % 100 == 0:
-                print('Episode: {}. Step: {}/{}. Time taken: {:.3f}s'.format(episode + 1,
+                print('Episode: {}. Step: {}/{}. Time taken for 100: {:.3f}s'.format(episode + 1,
                                          (step_num + 1), max_episode_length, time.time() - start))
                 start = time.time()
diff --git a/gym_ai2thor/__init__.py b/gym_ai2thor/__init__.py
index b3d8d79..97860a1 100644
--- a/gym_ai2thor/__init__.py
+++ b/gym_ai2thor/__init__.py
@@ -1,4 +1,4 @@
 from gym.envs.registration import register
 
-register(id='ai2thor_env-v0',
-         entry_point='gym_foo.envs:AI2ThorEnv')
+register(id='ai2thor-v0',
+         entry_point='gym_ai2thor.envs:AI2ThorEnv')
diff --git a/gym_ai2thor/config_files/NL_lookat_bowls_vs_cups_fp1_config.json b/gym_ai2thor/config_files/NL_lookat_bowls_vs_cups_fp1_config.json
new file mode 100644
index 0000000..fdb3dbc
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_lookat_bowls_vs_cups_fp1_config.json
@@ -0,0 +1,18 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": false,
+    "lookupdown_actions": true,
+    "max_episode_length": 1000,
+    "movement_reward": -0.001,
+    "scene_id": "FloorPlan1",
+    "grayscale": true,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "continuous_movement": true,
+    "build_file_name": "build_bowls_vs_cups_fp1_v_0.1.x86_64",
+    "task": {
+        "task_name": "NaturalLanguageLookAtObjectTask",
+        "list_of_instructions": ["Bowl", "Mug"]
+    }
+}
diff --git a/gym_ai2thor/config_files/NL_lookat_bowls_vs_cups_rotate_only_fp1_config.json b/gym_ai2thor/config_files/NL_lookat_bowls_vs_cups_rotate_only_fp1_config.json
new file mode 100644
index 0000000..05811ac
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_lookat_bowls_vs_cups_rotate_only_fp1_config.json
@@ -0,0 +1,20 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": false,
+    "lookupdown_actions": false,
+    "moveupdown_actions": false,
+    "max_episode_length": 1000,
+    "movement_reward": -0.001,
+    "scene_id": "FloorPlan2",
+    "grayscale": true,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "incremental_rotation": true,
+    "build_file_name": "bowls_vs_cups_rotate_only.x86_64",
+    "task": {
+        "task_name": "NaturalLanguageLookAtObjectTask",
+        "list_of_instructions": ["go to the Bowl", "please navigate to my Mug"],
+        "distance_threshold_3d": 3.0
+    }
+}
diff --git a/gym_ai2thor/config_files/NL_pickup_3_objects_surround_fp_211_v0.1.json b/gym_ai2thor/config_files/NL_pickup_3_objects_surround_fp_211_v0.1.json
new file mode 100644
index 0000000..3221737
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_pickup_3_objects_surround_fp_211_v0.1.json
@@ -0,0 +1,23 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": true,
+    "put_interaction": false,
+    "lookupdown_actions": false,
+    "continuous_movement": true,
+    "pickup_objects": ["RemoteControl", "TissueBoxEmpty", "Book"],
+    "max_episode_length": 2000,
+    "movement_reward": -0.001,
+    "scene_id": "FloorPlan211",
+    "grayscale": false,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "build_file_name": "NL_pickup_5_objects_surround_fp_211_v0.1.x86_64",
+    "task": {
+        "task_name": "NaturalLanguagePickUpObjectTask",
+        "list_of_instructions": ["RemoteControl", "TissueBoxEmpty", "Book"],
+        "max_object_pickup_crosshair_dist": 1.2,
+        "max_object_pickup_euclidean_dist": 0.55,
+        "default_reward": 1
+    }
+}
diff --git a/gym_ai2thor/config_files/NL_pickup_5_objects_surround_fp_211_v0.1.json b/gym_ai2thor/config_files/NL_pickup_5_objects_surround_fp_211_v0.1.json
new file mode 100644
index 0000000..c590481
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_pickup_5_objects_surround_fp_211_v0.1.json
@@ -0,0 +1,23 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": true,
+    "put_interaction": false,
+    "lookupdown_actions": false,
+    "continuous_movement": true,
+    "pickup_objects": ["Newspaper", "RemoteControl", "CellPhone", "TissueBoxEmpty", "Book"],
+    "max_episode_length": 2000,
+    "movement_reward": -0.001,
+    "scene_id": "FloorPlan211",
+    "grayscale": false,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "build_file_name": "NL_pickup_5_objects_surround_fp_211_v0.1.x86_64",
+    "task": {
+        "task_name": "NaturalLanguagePickUpObjectTask",
+        "list_of_instructions": ["Newspaper", "RemoteControl", "CellPhone", "TissueBoxEmpty", "Book"],
+        "max_object_pickup_crosshair_dist": 1.2,
+        "max_object_pickup_euclidean_dist": 0.55,
+        "default_reward": 1
+    }
+}
diff --git a/gym_ai2thor/config_files/NL_pickup_bowls_vs_cups_fp1_201_301_401_config.json b/gym_ai2thor/config_files/NL_pickup_bowls_vs_cups_fp1_201_301_401_config.json
new file mode 100644
index 0000000..39ed3b1
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_pickup_bowls_vs_cups_fp1_201_301_401_config.json
@@ -0,0 +1,30 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": true,
+    "put_interaction": false,
+    "lookupdown_actions": false,
+    "continuous_movement": true,
+    "pickup_objects": [
+        "Bowl",
+        "Mug"
+    ],
+    "acceptable_receptacles": [
+        "CounterTop",
+        "TableTop",
+        "Sink"
+    ],
+    "max_episode_length": 1000,
+    "movement_reward": -0.001,
+    "grayscale": false,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "build_file_name": "build_bowls_vs_cups_fp1_201_301_401_v_0.1.x86_64",
+    "task": {
+        "task_name": "NaturalLanguagePickUpObjectTask",
+        "list_of_instructions": ["Bowl", "Mug"],
+        "max_object_pickup_crosshair_dist": 1.2,
+        "max_object_pickup_euclidean_dist": 0.6,
+        "random_scene_ids_on_reset": ["FloorPlan1", "FloorPlan201", "FloorPlan301", "FloorPlan401"]
+    }
+}
diff --git a/gym_ai2thor/config_files/NL_pickup_bowls_vs_cups_fp1_config.json b/gym_ai2thor/config_files/NL_pickup_bowls_vs_cups_fp1_config.json
new file mode 100644
index 0000000..a57839d
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_pickup_bowls_vs_cups_fp1_config.json
@@ -0,0 +1,30 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": true,
+    "put_interaction": false,
+    "lookupdown_actions": false,
+    "continuous_movement": true,
+    "pickup_objects": [
+        "Bowl",
+        "Mug"
+    ],
+    "acceptable_receptacles": [
+        "CounterTop",
+        "TableTop",
+        "Sink"
+    ],
+    "max_episode_length": 1000,
+    "movement_reward": -0.001,
+    "scene_id": "FloorPlan1",
+    "grayscale": true,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "build_file_name": "build_bowls_vs_cups_fp1_v_0.1.x86_64",
+    "task": {
+        "task_name": "NaturalLanguagePickUpObjectTask",
+        "list_of_instructions": ["Bowl", "Mug"],
+        "max_object_pickup_crosshair_dist": 1.2,
+        "max_object_pickup_euclidean_dist": 0.55
+    }
+}
diff --git a/gym_ai2thor/config_files/NL_pickup_multiple_bowls_vs_cups_fp210_v0.1.json b/gym_ai2thor/config_files/NL_pickup_multiple_bowls_vs_cups_fp210_v0.1.json
new file mode 100644
index 0000000..088a1e4
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_pickup_multiple_bowls_vs_cups_fp210_v0.1.json
@@ -0,0 +1,25 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": true,
+    "put_interaction": false,
+    "lookupdown_actions": false,
+    "continuous_movement": true,
+    "pickup_objects": [
+        "Mug",
+        "Bowl"
+    ],
+    "max_episode_length": 2000,
+    "movement_reward": -0.001,
+    "scene_id": "FloorPlan210",
+    "grayscale": false,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "build_file_name": "NL_pickup_multiple_bowls_vs_cups_fp210_v0.1.x86_64",
+    "task": {
+        "task_name": "NaturalLanguagePickUpMultipleObjectTask",
+        "list_of_instructions": ["Mug", "Bowl"],
+        "max_object_pickup_crosshair_dist": 1.1,
+        "max_object_pickup_euclidean_dist": 0.5
+    }
+}
diff --git a/gym_ai2thor/config_files/NL_pickup_multiple_cups_only_fp404_v0.1.json b/gym_ai2thor/config_files/NL_pickup_multiple_cups_only_fp404_v0.1.json
new file mode 100644
index 0000000..eb44f2b
--- /dev/null
+++ b/gym_ai2thor/config_files/NL_pickup_multiple_cups_only_fp404_v0.1.json
@@ -0,0 +1,24 @@
+{
+    "open_close_interaction": false,
+    "pickup_put_interaction": true,
+    "put_interaction": false,
+    "lookupdown_actions": false,
+    "continuous_movement": true,
+    "pickup_objects": [
+        "Mug"
+    ],
+    "max_episode_length": 1000,
+    "movement_reward": -0.001,
+    "scene_id": "FloorPlan404",
+    "grayscale": false,
+    "resolution": [128, 128],
+    "cameraY": -0.8,
+    "gridSize": 0.05,
+    "build_file_name": "build_cups_only_pickup_multiple_fp404_v0.1.x86_64",
+    "task": {
+        "task_name": "NaturalLanguagePickUpMultipleObjectTask",
+        "list_of_instructions": ["Mug"],
+        "max_object_pickup_crosshair_dist": 1.2,
+        "max_object_pickup_euclidean_dist": 0.55
+    }
+}
diff --git a/gym_ai2thor/config_files/config_example.json b/gym_ai2thor/config_files/default_config.json
similarity index 66%
rename from gym_ai2thor/config_files/config_example.json
rename to gym_ai2thor/config_files/default_config.json
index 23fc3d1..5937960 100644
--- a/gym_ai2thor/config_files/config_example.json
+++ b/gym_ai2thor/config_files/default_config.json
@@ -1,7 +1,7 @@
 {
-
     "open_close_interaction": true,
     "pickup_put_interaction": true,
+    "lookupdown_actions": true,
     "pickup_objects": [
         "Mug"
     ],
@@ -13,11 +13,16 @@
     "openable_objects": [
         "Microwave"
     ],
+    "max_episode_length": 1000,
     "scene_id": "FloorPlan28",
     "grayscale": true,
     "resolution": [128, 128],
+    "cameraY": 0.0,
+    "gridSize": 0.1,
+    "continuous_movement": false,
+    "build_file_name": false,
     "task": {
-        "task_name": "PickUp",
+        "task_name": "PickUpTask",
         "target_object": "Mug"
     }
 }
diff --git a/gym_ai2thor/envs/ai2thor_env.py b/gym_ai2thor/envs/ai2thor_env.py
index 0a8c586..f606082 100644
--- a/gym_ai2thor/envs/ai2thor_env.py
+++ b/gym_ai2thor/envs/ai2thor_env.py
@@ -2,17 +2,21 @@
 Base class implementation for ai2thor environments wrapper, which adds an openAI gym interface for
 inheriting the predefined methods and can be extended for particular tasks.
 """
+import random
+import os
+import warnings
+
 import ai2thor.controller
 import numpy as np
 from skimage import transform
-from copy import deepcopy
-
 import gym
 from gym import error, spaces
 from gym.utils import seeding
+
 from gym_ai2thor.image_processing import rgb2gray
 from gym_ai2thor.utils import read_config
 from gym_ai2thor.tasks import TaskFactory
+from gym_ai2thor.task_utils import calculate_euc_distance_between_agent_and_object
 
 ALL_POSSIBLE_ACTIONS = [
     'MoveAhead',
@@ -27,35 +31,38 @@
     'CloseObject',
     'PickupObject',
     'PutObject'
+    # Rotate is also possible when continuous_movement == True but we don't list it here
     # Teleport and TeleportFull but these shouldn't be allowable actions for an agent
 ]
 
 
 class AI2ThorEnv(gym.Env):
     """
-    Wrapper base class
+    This class wraps the ai2thor environment
     """
-    def __init__(self, seed=None, config_file='config_files/config_example.json', config_dict=None):
+    def __init__(self, config_file='config_files/default_config.json', config_dict=None):
         """
-        :param seed:         (int)   Random seed
         :param config_file:  (str)   Path to environment configuration file. Either absolute or
                                      relative path to the root of this repository.
+                                     default and good example at: config_files/config_example.json
         :param: config_dict: (dict)  Overrides specific fields from the input configuration file.
         """
         # Loads config settings from file
+        print('Reading in base config file at: {}'.format(config_file))
         self.config = read_config(config_file, config_dict)
-        self.scene_id = self.config['scene_id']
         # Randomness settings
         self.np_random = None
-        if seed:
-            self.seed(seed)
+        self.num_random_actions_at_init = self.config.get('num_random_actions_at_init', 0)
         # Object settings
         # acceptable objects taken from config file.
-        if self.config['pickup_put_interaction'] or \
-                            self.config['open_close_interaction']:
-            self.objects = {'pickupables': self.config['pickup_objects'],
-                            'receptacles': self.config['acceptable_receptacles'],
-                            'openables':   self.config['openable_objects']}
+        self.allowed_objects = {}
+        if self.config['pickup_put_interaction']:
+            self.allowed_objects['pickupables'] = self.config['pickup_objects']
+            self.allowed_objects['receptacles'] = self.config.get('acceptable_receptacles', [])
+            if not self.allowed_objects['receptacles']:
+                warnings.warn('list of receptacles is empty or wasn\'t given')
+        if self.config['open_close_interaction']:
+            self.allowed_objects['openables'] = self.config['openable_objects']
         # Action settings
         self.action_names = tuple(ALL_POSSIBLE_ACTIONS.copy())
         # remove open/close and pickup/put actions if respective interaction bool is set to False
@@ -66,7 +73,22 @@ def __init__(self, seed=None, config_file='config_files/config_example.json', co
         if not self.config['pickup_put_interaction']:
             self.action_names = tuple([action_name for action_name in self.action_names if 'Pickup'
                                        not in action_name and 'Put' not in action_name])
+        if self.config.get('put_interaction'):
+            self.action_names = tuple([action_name for action_name in self.action_names
+                                       if 'Put' not in action_name])
+        if not self.config.get('lookupdown_actions', True):
+            self.action_names = tuple([action_name for action_name in self.action_names
+                                       if 'Look' not in action_name])
+        if not self.config.get('moveupdown_actions', True):
+            self.action_names = tuple([action_name for action_name in self.action_names
+                                       if 'Move' not in action_name])
         self.action_space = spaces.Discrete(len(self.action_names))
+        # rotation settings
+        self.continuous_movement = self.config.get('continuous_movement', False)
+        if self.continuous_movement:
+            self.absolute_rotation = 0.0
+            self.rotation_amount = 10.0
+
         # Image settings
         self.event = None
         channels = 1 if self.config['grayscale'] else 3
@@ -74,81 +96,137 @@ def __init__(self, seed=None, config_file='config_files/config_example.json', co
                                             shape=(channels, self.config['resolution'][0],
                                                    self.config['resolution'][1]),
                                             dtype=np.uint8)
+        # ai2thor initialise function settings
+        self.cameraY = self.config.get('cameraY', 0.0)
+        self.gridSize = self.config.get('gridSize', 0.1)
+        
         # Create task from config
         self.task = TaskFactory.create_task(self.config)
+        # set scene_id
+        if self.task.random_scene_ids_on_reset:
+            self.scene_id = random.choice(self.task.random_scene_ids_on_reset)
+        else:
+            self.scene_id = self.config.get('scene_id')
+        if not self.scene_id:
+            raise ValueError('Need to specify scene_id in config')
+        # todo check for self.config['task'].get('list_of_xyz_starting_positions')
+        # todo and implement random.choice of starting position for specific task/scene with teleport
+        # todo precompute all valid locations for all scenes and avoid objects too?
         # Start ai2thor
         self.controller = ai2thor.controller.Controller()
+        if self.config.get('build_file_name'):
+            # file must be in gym_ai2thor/build_files
+            self.build_file_path = os.path.abspath(os.path.join(__file__, '../../build_files',
+                                                   self.config['build_file_name']))
+            print('Build file path at: {}'.format(self.build_file_path))
+            if not os.path.exists(self.build_file_path):
+                raise ValueError('Unity build file at:\n{}\n does not exist'.format(
+                    self.build_file_path))
+            self.controller.local_executable_path = self.build_file_path
         self.controller.start()
 
+        self.reset_ever = False
+
     def step(self, action, verbose=True):
         if not self.action_space.contains(action):
             raise error.InvalidAction('Action must be an integer between '
                                       '0 and {}!'.format(self.action_space.n))
+        if not self.reset_ever:
+            raise ValueError('Cannot step() in environment if it has not been reset() before')
         action_str = self.action_names[action]
         visible_objects = [obj for obj in self.event.metadata['objects'] if obj['visible']]
+        self.event.metadata['lastObjectPut'] = None
+        self.event.metadata['lastObjectPutReceptacle'] = None
+        self.event.metadata['lastObjectPickedUp'] = None
+        self.event.metadata['lastObjectOpened'] = None
+        self.event.metadata['lastObjectClosed'] = None
 
+        # if/else statements below for dealing with up to 13 actions
         if action_str.endswith('Object'):  # All interactions end with 'Object'
             # Interaction actions
             interaction_obj, distance = None, float('inf')
-            inventory_before = self.event.metadata['inventoryObjects'][0]['objectType'] \
-                if self.event.metadata['inventoryObjects'] else []
-            if action_str == 'PutObject':
+            inventory_before = [x['objectType'] for x in
+                                   self.event.metadata['inventoryObjects']] \
+                                   if self.event.metadata['inventoryObjects'] else []
+            if action_str.startswith('Put'):
                 closest_receptacle = None
                 for obj in visible_objects:
                     # look for closest receptacle to put object from inventory
-                    if obj['receptacle'] and obj['distance'] < distance \
-                        and obj['objectType'] in self.objects['receptacles'] \
-                            and len(obj['receptacleObjectIds']) < obj['receptacleCount']:
+                    closest_receptacle_to_put_object_in = obj['receptacle'] and \
+                                                          obj['distance'] < distance \
+                                    and obj['objectType'] in self.allowed_objects['receptacles'] \
+                                    and len(obj['receptacleObjectIds']) < obj['receptacleCount']
+                    if closest_receptacle_to_put_object_in:
                         closest_receptacle = obj
                         distance = closest_receptacle['distance']
                 if self.event.metadata['inventoryObjects'] and closest_receptacle:
                     interaction_obj = closest_receptacle
+                    object_to_put = self.event.metadata['inventoryObjects'][0]
                     self.event = self.controller.step(
                             dict(action=action_str,
-                                 objectId=self.event.metadata['inventoryObjects'][0]['objectId'],
+                                 objectId=object_to_put['objectId'],
                                  receptacleObjectId=interaction_obj['objectId']))
-            elif action_str == 'PickupObject':
+                    self.event.metadata['lastObjectPut'] = object_to_put
+                    self.event.metadata['lastObjectPutReceptacle'] = interaction_obj
+            elif action_str.startswith('Pickup'):
                 closest_pickupable = None
                 for obj in visible_objects:
                     # look for closest object to pick up
-                    if obj['pickupable'] and obj['distance'] < distance and \
-                            obj['objectType'] in self.objects['pickupables']:
-                        closest_pickupable = obj
-                if closest_pickupable and not self.event.metadata['inventoryObjects']:
+                    closest_object_to_pick_up = obj['pickupable'] and \
+                                                obj['distance'] < distance and \
+                                obj['distance'] < self.task.max_object_pickup_crosshair_dist and \
+                                obj['objectType'] in self.allowed_objects['pickupables']
+                    if closest_object_to_pick_up:
+                        if self.task.max_object_pickup_euclidean_dist:
+                            euc_distance_to_obj = calculate_euc_distance_between_agent_and_object(
+                                self.event.metadata['agent'], obj)
+                            if euc_distance_to_obj < self.task.max_object_pickup_euclidean_dist:
+                                closest_pickupable = obj
+                        else:
+                            closest_pickupable = obj
+                if closest_pickupable: # and not self.event.metadata['inventoryObjects']: # todo option to check for this@?
                     interaction_obj = closest_pickupable
                     self.event = self.controller.step(
                         dict(action=action_str, objectId=interaction_obj['objectId']))
-            elif action_str == 'OpenObject':
+                    self.event.metadata['lastObjectPickedUp'] = interaction_obj
+            elif action_str.startswith('Open'):
                 closest_openable = None
                 for obj in visible_objects:
                     # look for closest closed receptacle to open it
-                    if obj['openable'] and obj['distance'] < distance and not obj['isopen'] and \
-                            obj['objectType'] in self.objects['openables']:
+                    is_closest_closed_receptacle = obj['openable'] and \
+                            obj['distance'] < distance and not obj['isopen'] and \
+                            obj['objectType'] in self.allowed_objects['openables']
+                    if is_closest_closed_receptacle:
                         closest_openable = obj
                         distance = closest_openable['distance']
-                    if closest_openable:
-                        interaction_obj = closest_openable
-                        self.event = self.controller.step(
-                            dict(action=action_str,
-                                 objectId=interaction_obj['objectId']))
-            elif action_str == 'CloseObject':
+                if closest_openable:
+                    interaction_obj = closest_openable
+                    self.event = self.controller.step(
+                        dict(action=action_str, objectId=interaction_obj['objectId']))
+                    self.event.metadata['lastObjectOpened'] = interaction_obj
+            elif action_str.startswith('Close'):
                 closest_openable = None
                 for obj in visible_objects:
                     # look for closest opened receptacle to close it
-                    if obj['openable'] and obj['distance'] < distance and obj['isopen'] and \
-                            obj['objectType'] in self.objects['openables']:
+                    is_closest_open_receptacle = obj['openable'] and obj['distance'] < distance \
+                                                 and obj['isopen'] and \
+                                                 obj['objectType'] in self.allowed_objects[
+                                                     'openables']
+                    if is_closest_open_receptacle:
                         closest_openable = obj
                         distance = closest_openable['distance']
-                    if closest_openable:
-                        interaction_obj = closest_openable
-                        self.event = self.controller.step(
-                            dict(action=action_str,
-                                 objectId=interaction_obj['objectId']))
+                if closest_openable:
+                    interaction_obj = closest_openable
+                    self.event = self.controller.step(
+                        dict(action=action_str, objectId=interaction_obj['objectId']))
+                    self.event.metadata['lastObjectClosed'] = interaction_obj
             else:
                 raise error.InvalidAction('Invalid interaction {}'.format(action_str))
+            # print what object was interacted with and state of inventory
             if interaction_obj and verbose:
-                inventory_after = self.event.metadata['inventoryObjects'][0]['objectType'] \
-                    if self.event.metadata['inventoryObjects'] else []
+                inventory_after = [x['objectType'] for x in
+                                   self.event.metadata['inventoryObjects']] \
+                                   if self.event.metadata['inventoryObjects'] else []
                 if action_str in ['PutObject', 'PickupObject']:
                     inventory_changed_str = 'Inventory before/after: {}/{}.'.format(
                                                             inventory_before, inventory_after)
@@ -156,16 +234,36 @@ def step(self, action, verbose=True):
                     inventory_changed_str = ''
                 print('{}: {}. {}'.format(
                     action_str, interaction_obj['objectType'], inventory_changed_str))
+        elif action_str.startswith('Rotate'):
+            if self.continuous_movement:
+                # Rotate actions
+                if action_str.endswith('Left'):
+                    self.absolute_rotation -= self.rotation_amount
+                    self.event = self.controller.step(
+                        dict(action='Rotate', rotation=self.absolute_rotation))
+                elif action_str.endswith('Right'):
+                    self.absolute_rotation += self.rotation_amount
+                    self.event = self.controller.step(
+                        dict(action='Rotate', rotation=self.absolute_rotation))
+            else:
+                # Do normal RotateLeft/Right command in discrete mode (i.e. 3D GridWorld)
+                self.event = self.controller.step(dict(action=action_str))
         else:
-            # Move, Look or Rotate actions
+            # Move and Look actions
             self.event = self.controller.step(dict(action=action_str))
 
         self.task.step_num += 1
-        state_image = self.preprocess(self.event.frame)
+
+        # todo if frame_segmentation_on: return segmented image. OR could add it as a channel?
+        # todo create 50 random room tasks with segmentation on since it should be easier
+        image_state = self.preprocess(self.event.frame)
+        extra_state = self.task.get_extra_state()
+        state = (image_state, extra_state) if extra_state else image_state
+
         reward, done = self.task.transition_reward(self.event)
-        info = {}
 
-        return state_image, reward, done, info
+        info = {}
+        return state, reward, done, info
 
     def preprocess(self, img):
         """
@@ -175,16 +273,39 @@ def preprocess(self, img):
         img = img.astype(np.float32)
         if self.observation_space.shape[0] == 1:
             img = rgb2gray(img)  # todo cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img = np.moveaxis(img, 2, 0)
         return img
 
     def reset(self):
         print('Resetting environment and starting new episode')
+        self.reset_ever = True
+        if self.task.random_scene_ids_on_reset:
+            self.scene_id = random.choice(self.task.random_scene_ids_on_reset)
+            print('Changing to scene_id: {}'.format(self.scene_id))
         self.controller.reset(self.scene_id)
-        self.event = self.controller.step(dict(action='Initialize', gridSize=0.25,
-                                               renderDepthImage=True, renderClassImage=True,
-                                               renderObjectImage=True))
-        self.task.reset()
-        state = self.preprocess(self.event.frame)
+        self.event = self.controller.step(dict(action='Initialize', gridSize=self.gridSize,
+                                               cameraY=self.cameraY, renderDepthImage=True,
+                                               renderClassImage=True, renderObjectImage=True,
+                                               continuous=self.continuous_movement))
+        self.task.step_num = 0  # needed so done returns False below. task.reset() also does this
+
+        if self.num_random_actions_at_init > 0:
+            for i in range(self.num_random_actions_at_init):
+                action = self.action_space.sample()
+                _, _, done, _ = self.step(action)
+                if done:
+                    return self.reset()  # if we end episode in random actions, reset again
+
+        if self.continuous_movement:
+            self.absolute_rotation = 0.0
+        extra_state = self.task.reset()
+        image_state = self.preprocess(self.event.frame)
+
+        if extra_state:
+            state = (image_state, extra_state)
+        else:
+            state = image_state
+
         return state
 
     def render(self, mode='human'):
@@ -192,6 +313,7 @@ def render(self, mode='human'):
 
     def seed(self, seed=None):
         self.np_random, seed1 = seeding.np_random(seed)
+        random.seed(seed)
         # Derive a random seed. This gets passed as a uint, but gets
         # checked as an int elsewhere, so we need to keep it below
         # 2**31.
@@ -199,7 +321,3 @@ def seed(self, seed=None):
 
     def close(self):
         self.controller.stop()
-
-
-if __name__ == '__main__':
-    AI2ThorEnv()
diff --git a/gym_ai2thor/image_processing.py b/gym_ai2thor/image_processing.py
index 9772586..d208276 100644
--- a/gym_ai2thor/image_processing.py
+++ b/gym_ai2thor/image_processing.py
@@ -9,4 +9,4 @@ def rgb2gray(rgb):
     RGB image transformation to Luma component assuming BT.601 color.
     https://en.wikipedia.org/wiki/Luma_(video)
     """
-    return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])
+    return np.expand_dims(np.dot(rgb[..., :3], [0.299, 0.587, 0.114]), axis=2)
diff --git a/gym_ai2thor/sample_event_metadata.json b/gym_ai2thor/sample_event_metadata.json
new file mode 100644
index 0000000..3115dda
--- /dev/null
+++ b/gym_ai2thor/sample_event_metadata.json
@@ -0,0 +1,1398 @@
+{"agent": {"bounds3D": [],
+           "cameraHorizon": 0.0,
+           "distance": 0.0,
+           "isopen": False,
+           "name": "agent",
+           "objectId": "",
+           "objectType": "",
+           "openable": False,
+           "parentReceptacle": "",
+           "pickupable": False,
+           "pivotSimObjs": [],
+           "position": {"x": -1.0, "y": 0.9799990057945251, "z": -1.75},
+           "receptacle": False,
+           "receptacleCount": 0,
+           "receptacleObjectIds": [],
+           "rotation": {"x": 0.0, "y": 180.0, "z": 0.0},
+           "visible": False},
+ "agentId": 0,
+ "collided": False,
+ "collidedObjects": [],
+ "colorBounds": [{"bounds": [0, 0, 299, 65], "color": [127, 29, 203]},
+                 {"bounds": [12, 0, 151, 58], "color": [177, 60, 44]},
+                 {"bounds": [96, 0, 197, 25], "color": [89, 77, 61]},
+                 {"bounds": [198, 0, 299, 25], "color": [108, 174, 95]},
+                 {"bounds": [152, 1, 195, 23], "color": [191, 227, 85]},
+                 {"bounds": [83, 15, 89, 18], "color": [71, 3, 53]},
+                 {"bounds": [0, 18, 299, 235], "color": [73, 64, 168]},
+                 {"bounds": [0, 20, 7, 61], "color": [186, 206, 150]},
+                 {"bounds": [17, 37, 299, 65], "color": [115, 3, 101]},
+                 {"bounds": [6, 40, 24, 87], "color": [199, 148, 125]},
+                 {"bounds": [239, 41, 252, 52], "color": [56, 51, 197]},
+                 {"bounds": [191, 43, 202, 53], "color": [36, 91, 74]},
+                 {"bounds": [216, 43, 228, 56], "color": [178, 18, 13]},
+                 {"bounds": [32, 45, 43, 89], "color": [202, 210, 177]},
+                 {"bounds": [55, 45, 82, 62], "color": [72, 78, 219]},
+                 {"bounds": [176, 51, 211, 73], "color": [42, 54, 156]},
+                 {"bounds": [51, 52, 60, 87], "color": [60, 103, 95]},
+                 {"bounds": [223, 53, 236, 69], "color": [214, 68, 168]},
+                 {"bounds": [285, 54, 299, 68], "color": [18, 240, 113]},
+                 {"bounds": [18, 55, 92, 100], "color": [110, 250, 103]},
+                 {"bounds": [135, 55, 167, 69], "color": [135, 13, 200]},
+                 {"bounds": [263, 55, 271, 69], "color": [127, 73, 96]},
+                 {"bounds": [67, 56, 75, 95], "color": [226, 66, 148]},
+                 {"bounds": [247, 57, 260, 71], "color": [20, 107, 195]},
+                 {"bounds": [207, 58, 221, 90], "color": [118, 102, 24]},
+                 {"bounds": [261, 58, 278, 83], "color": [219, 74, 174]},
+                 {"bounds": [117, 59, 127, 74], "color": [44, 186, 193]},
+                 {"bounds": [227, 59, 241, 91], "color": [182, 114, 149]},
+                 {"bounds": [76, 61, 78, 77], "color": [202, 218, 132]},
+                 {"bounds": [90, 61, 99, 84], "color": [122, 156, 16]},
+                 {"bounds": [281, 62, 295, 87], "color": [216, 69, 22]},
+                 {"bounds": [181, 76, 187, 79], "color": [34, 152, 164]},
+                 {"bounds": [150, 80, 237, 213], "color": [92, 62, 94]},
+                 {"bounds": [113, 81, 125, 88], "color": [17, 67, 188]},
+                 {"bounds": [0, 107, 136, 296], "color": [216, 148, 75]},
+                 {"bounds": [0, 112, 33, 291], "color": [104, 199, 254]},
+                 {"bounds": [40, 122, 109, 256], "color": [181, 237, 187]},
+                 {"bounds": [10, 236, 299, 299], "color": [55, 223, 207]},
+                 {"bounds": [56, 246, 94, 260], "color": [143, 211, 227]},
+                 {"bounds": [0, 270, 28, 299], "color": [135, 101, 149]}],
+ "colors": [{"color": [58, 205, 56], "name": "Bowl|-00.16|+01.50|-01.45"},
+            {"color": [209, 182, 193], "name": "Bowl"},
+            {"color": [226, 29, 217], "name": "Container|-00.16|+00.93|-02.94"},
+            {"color": [14, 114, 120], "name": "Container"},
+            {"color": [219, 14, 164], "name": "Ladel1.001"},
+            {"color": [138, 235, 7], "name": "Fridge|-00.22|00.00|-00.83"},
+            {"color": [91, 156, 207], "name": "Fridge1"},
+            {"color": [181, 237, 187], "name": "Cabinet|-00.35|+01.89|-03.29"},
+            {"color": [210, 149, 89], "name": "Drawer"},
+            {"color": [237, 189, 33], "name": "StoveBase1"},
+            {"color": [216, 148, 75], "name": "Cube.090"},
+            {"color": [117, 7, 236], "name": "Toaster|-00.16|+00.93|-01.45"},
+            {"color": [55, 33, 114], "name": "Toaster1"},
+            {"color": [215, 152, 183], "name": "Cabinet|-00.34|+01.89|-01.29"},
+            {"color": [44, 186, 193], "name": "Mug|-00.78|+00.93|-03.85"},
+            {"color": [8, 94, 186], "name": "CoffeeCup1"},
+            {"color": [122, 156, 16], "name": "Bottle5.001"},
+            {"color": [116, 220, 170],
+             "name": "StoveKnob|-00.62|+00.90|-01.98"},
+            {"color": [106, 252, 95], "name": "StoveKnob2_Range4"},
+            {"color": [41, 198, 116], "name": "Spatula2.001"},
+            {"color": [119, 173, 49], "name": "Torus"},
+            {"color": [168, 12, 250], "name": "Cabinet|-01.01|+00.39|-03.37"},
+            {"color": [61, 44, 125], "name": "Microwave|-00.17|+01.49|-02.06"},
+            {"color": [54, 96, 202], "name": "Microwave4"},
+            {"color": [240, 130, 222],
+             "name": "StoveBurner|-00.23|+00.93|-01.85"},
+            {"color": [156, 249, 101], "name": "GasStoveTop_Range1"},
+            {"color": [72, 78, 219], "name": "Sphere.010"},
+            {"color": [255, 102, 152],
+             "name": "StoveBurner|-00.42|+00.93|-02.26"},
+            {"color": [248, 115, 142],
+             "name": "StoveBurner|-00.23|+00.93|-02.26"},
+            {"color": [135, 13, 200], "name": "TurkeyPan.005"},
+            {"color": [45, 75, 161], "name": "Cabinet|-00.34|+02.11|-01.27"},
+            {"color": [92, 3, 233], "name": "Spatula1.002"},
+            {"color": [96, 50, 133], "name": "Towl1 (1)"},
+            {"color": [143, 211, 227], "name": "Cylinder.028"},
+            {"color": [108, 174, 95], "name": "Cube.085"},
+            {"color": [34, 152, 164], "name": "SugarJar.005"},
+            {"color": [96, 48, 36], "name": "Cabinet|-00.48|+00.78|-02.74"},
+            {"color": [131, 29, 70], "name": "Ladel3.001"},
+            {"color": [55, 223, 207], "name": "Ceiling"},
+            {"color": [102, 49, 87], "name": "Knife|-00.14|+01.12|-02.75"},
+            {"color": [211, 157, 122], "name": "Knife1"},
+            {"color": [177, 60, 44], "name": "Cube.100"},
+            {"color": [114, 84, 146], "name": "StoveKnob|-00.62|+00.90|-02.13"},
+            {"color": [60, 103, 95], "name": "Bottle3.001"},
+            {"color": [186, 206, 150], "name": "PaperRoll1"},
+            {"color": [164, 253, 150], "name": "Sphere.012"},
+            {"color": [77, 4, 136], "name": "Spatula1.001"},
+            {"color": [135, 101, 149], "name": "TurkeyPan.006"},
+            {"color": [237, 39, 71], "name": "Decals.002"},
+            {"color": [226, 66, 148], "name": "Bottle4.001"},
+            {"color": [246, 16, 151], "name": "StoveKnob|-00.62|+00.90|-01.83"},
+            {"color": [36, 91, 74], "name": "Tomato|-01.32|+00.93|-03.53"},
+            {"color": [119, 189, 121], "name": "Tomato"},
+            {"color": [193, 44, 202], "name": "Cabinet|-00.63|+00.39|-03.01"},
+            {"color": [118, 102, 24], "name": "SugarJar.004"},
+            {"color": [92, 62, 94], "name": "VenetianFrame"},
+            {"color": [14, 97, 183], "name": "Towl1"},
+            {"color": [87, 195, 41], "name": "GarbageCan|-00.36|00.00|-00.21"},
+            {"color": [225, 40, 55], "name": "GarbageCan"},
+            {"color": [110, 132, 248],
+             "name": "CoffeeMachine|-02.65|+00.93|-03.57"},
+            {"color": [147, 71, 238], "name": "CoffeeMachine2"},
+            {"color": [214, 15, 78], "name": "Floor"},
+            {"color": [73, 64, 168], "name": "Room"},
+            {"color": [89, 77, 61], "name": "Cube.086"},
+            {"color": [127, 29, 203], "name": "Cube.082"},
+            {"color": [97, 134, 44], "name": "StoveTopDoor1"},
+            {"color": [140, 135, 166], "name": "Fork|-00.48|+00.81|-02.74"},
+            {"color": [54, 200, 25], "name": "Fork1"},
+            {"color": [185, 225, 171],
+             "name": "StoveKnob|-00.62|+00.90|-02.29"},
+            {"color": [91, 94, 10], "name": "Egg|-00.21|+00.27|-00.83"},
+            {"color": [240, 75, 163], "name": "Egg"},
+            {"color": [162, 203, 153], "name": "Mug|-00.53|+00.93|-01.58"},
+            {"color": [1, 209, 145], "name": "Cabinet|-00.34|+02.11|-01.63"},
+            {"color": [104, 199, 254], "name": "Cabinet|-00.33|+01.89|-03.24"},
+            {"color": [29, 84, 249], "name": "Spoon|-00.50|+00.78|-01.45"},
+            {"color": [235, 57, 90], "name": "Spoon"},
+            {"color": [115, 3, 101], "name": "Decals.003"},
+            {"color": [71, 3, 53], "name": "Sphere.008"},
+            {"color": [191, 227, 85], "name": "Cabinet|-01.15|+00.78|-03.50"},
+            {"color": [238, 221, 39], "name": "Cabinet|-00.33|+01.89|-02.51"},
+            {"color": [18, 240, 113], "name": "SugarFill.006"},
+            {"color": [36, 61, 25], "name": "Cabinet|-00.34|+02.11|-02.50"},
+            {"color": [214, 68, 168], "name": "Mug|-01.63|+00.92|-03.74"},
+            {"color": [17, 67, 188], "name": "Outlet (1)"},
+            {"color": [66, 225, 0], "name": "ButterKnife|-00.43|+00.93|-02.60"},
+            {"color": [135, 147, 55], "name": "butterKnife"},
+            {"color": [115, 78, 181], "name": "StoveTopGas"},
+            {"color": [182, 114, 149], "name": "SugarJar.001"},
+            {"color": [139, 56, 140], "name": "StoveBottomDoor1"},
+            {"color": [202, 218, 132], "name": "Cube.109"},
+            {"color": [178, 18, 13], "name": "Apple|-01.49|+00.93|-03.50"},
+            {"color": [159, 98, 144], "name": "Apple"},
+            {"color": [20, 107, 195], "name": "SugarFill.001"},
+            {"color": [193, 221, 101], "name": "Plate|-00.15|+01.49|-02.73"},
+            {"color": [188, 154, 128], "name": "Plate"},
+            {"color": [55, 176, 84], "name": "Cabinet|-00.63|+00.39|-01.61"},
+            {"color": [145, 107, 85], "name": "Cabinet|-00.34|+02.11|-00.39"},
+            {"color": [138, 185, 132], "name": "SugarJar.003"},
+            {"color": [202, 210, 177], "name": "Bottle2.001"},
+            {"color": [141, 139, 54], "name": "Cabinet|-00.63|+00.39|-02.51"},
+            {"color": [96, 140, 59], "name": "Chair|-02.35|00.00|-03.60"},
+            {"color": [166, 13, 176], "name": "Chair5"},
+            {"color": [199, 148, 125], "name": "Bottle1.001"},
+            {"color": [34, 126, 70], "name": "ladel2.001"},
+            {"color": [48, 42, 241], "name": "SugarJar.006"},
+            {"color": [127, 73, 96], "name": "SugarFill.004"},
+            {"color": [219, 74, 174], "name": "Sugar.001"},
+            {"color": [216, 69, 22], "name": "SugarJar.002"},
+            {"color": [31, 88, 95], "name": "StoveBurner|-00.42|+00.93|-01.85"},
+            {"color": [193, 143, 140], "name": "Outlet"},
+            {"color": [97, 114, 178], "name": "Sphere.001"},
+            {"color": [56, 51, 197], "name": "Potato|-01.63|+00.93|-03.48"},
+            {"color": [187, 142, 9], "name": "Potato"},
+            {"color": [42, 54, 156], "name": "Bread|-01.33|+00.93|-03.71"},
+            {"color": [18, 150, 252], "name": "Bread"},
+            {"color": [195, 218, 223], "name": "Cabinet|-00.50|+00.78|-01.45"},
+            {"color": [34, 130, 237], "name": "Pot|-00.47|+00.08|-02.74"},
+            {"color": [132, 237, 87], "name": "Pot1"},
+            {"color": [110, 250, 103], "name": "Bottles.001"},
+            {"color": [4, 93, 193], "name": "Lettuce|-00.33|+00.74|-00.69"},
+            {"color": [203, 156, 88], "name": "Lettuce1"},
+            {"color": [241, 134, 252], "name": "Baseboard.020"},
+            {"color": [127, 127, 189], "name": "Pan|-00.68|+00.08|-03.27"},
+            {"color": [246, 212, 161], "name": "Pan1"},
+            {"color": [207, 119, 70], "name": "Spatula3.001"}],
+ "errorCode": "",
+ "errorMessage": "",
+ "inventoryObjects": [],
+ "lastAction": "MoveAhead",
+ "lastActionSuccess": True,
+ "objects": [{"bounds3D": [-2.5750010013580322,
+                           0.8563164472579956,
+                           -3.647000312805176,
+                           -1.5749990940093994,
+                           0.9563164710998535,
+                           -3.3069992065429688],
+              "cameraHorizon": 0.0,
+              "distance": 2.159883975982666,
+              "isopen": False,
+              "name": "Tabletop",
+              "objectId": "TableTop|-02.08|+00.94|-03.62",
+              "objectType": "TableTop",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -2.075000047683716,
+                           "y": 0.9433164596557617,
+                           "z": -3.622999906539917},
+              "receptacle": True,
+              "receptacleCount": 4,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 90.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.2521742284297943,
+                           1.4949759244918823,
+                           -2.831829071044922,
+                           -0.05024271458387375,
+                           1.5067294836044312,
+                           -2.6298975944519043],
+              "cameraHorizon": 0.0,
+              "distance": 1.3955355882644653,
+              "isopen": False,
+              "name": "Plate",
+              "objectId": "Plate|-00.15|+01.49|-02.73",
+              "objectType": "Plate",
+              "openable": False,
+              "parentReceptacle": "Cabinet|-00.33|+01.89|-02.51",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.15120847523212433,
+                           "y": 1.494760513305664,
+                           "z": -2.730863332748413},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": -1.0245284101983998e-05, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6563448309898376,
+                           0.8580825328826904,
+                           -2.015467643737793,
+                           -0.576196014881134,
+                           0.9382582902908325,
+                           -1.9353333711624146],
+              "cameraHorizon": 0.0,
+              "distance": 0.45102375745773315,
+              "isopen": False,
+              "name": "StoveKnob2_Range2",
+              "objectId": "StoveKnob|-00.62|+00.90|-01.98",
+              "objectType": "StoveKnob",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.6176999807357788,
+                           "y": 0.8996000289916992,
+                           "z": -1.9753999710083008},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 315.0,
+                           "y": 89.97400665283203,
+                           "z": 180.03199768066406},
+              "visible": False},
+             {"bounds3D": [-1.3614451885223389,
+                           0.9283196926116943,
+                           -3.5663928985595703,
+                           -1.2814817428588867,
+                           0.9905622005462646,
+                           -3.486574649810791],
+              "cameraHorizon": 0.0,
+              "distance": 1.805967092514038,
+              "isopen": False,
+              "name": "Tomato",
+              "objectId": "Tomato|-01.32|+00.93|-03.53",
+              "objectType": "Tomato",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -1.3221999406814575,
+                           "y": 0.9303702116012573,
+                           "z": -3.5262999534606934},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.7945087552070618,
+                           0.07984550297260284,
+                           -3.400216579437256,
+                           -0.5677620768547058,
+                           0.12984557449817657,
+                           -3.1494078636169434],
+              "cameraHorizon": 0.0,
+              "distance": 1.79671049118042,
+              "isopen": False,
+              "name": "Pan1",
+              "objectId": "Pan|-00.68|+00.08|-03.27",
+              "objectType": "Pan",
+              "openable": False,
+              "parentReceptacle": "Cabinet|-00.63|+00.39|-03.01",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.6810178160667419,
+                           "y": 0.08484554290771484,
+                           "z": -3.274834156036377},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": -6.1288878896448296e-06,
+                           "y": 280.44842529296875,
+                           "z": 1.398907170369057e-05},
+              "visible": False},
+             {"bounds3D": [-0.21095620095729828,
+                           0.9303669929504395,
+                           -2.992823362350464,
+                           -0.09956331551074982,
+                           1.1846275329589844,
+                           -2.8814303874969482],
+              "cameraHorizon": 0.0,
+              "distance": 1.4578475952148438,
+              "isopen": False,
+              "name": "Container",
+              "objectId": "Container|-00.16|+00.93|-02.94",
+              "objectType": "Container",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.15525996685028076,
+                           "y": 0.9303703308105469,
+                           "z": -2.937127113342285},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.40836191177368164,
+                           0.14085793495178223,
+                           -1.15748929977417,
+                           0.030406057834625244,
+                           1.7145073413848877,
+                           -0.5005106925964355],
+              "cameraHorizon": 0.0,
+              "distance": 1.5538111925125122,
+              "isopen": False,
+              "name": "Fridge1",
+              "objectId": "Fridge|-00.22|00.00|-00.83",
+              "objectType": "Fridge",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [{"objectId": "Egg|-00.21|+00.27|-00.83",
+                                "pivotId": 0},
+                               {"objectId": "Lettuce|-00.33|+00.74|-00.69",
+                                "pivotId": 1}],
+              "position": {"x": -0.22300000488758087,
+                           "y": -0.0010000000474974513,
+                           "z": -0.8289999961853027},
+              "receptacle": True,
+              "receptacleCount": 6,
+              "receptacleObjectIds": ["Egg|-00.21|+00.27|-00.83",
+                                      "Lettuce|-00.33|+00.74|-00.69"],
+              "rotation": {"x": 0.0, "y": 270.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6255507469177246,
+                           0.8067288994789124,
+                           -2.7551281452178955,
+                           -0.38278937339782715,
+                           0.826447069644928,
+                           -2.7230093479156494],
+              "cameraHorizon": 0.0,
+              "distance": 1.1287176609039307,
+              "isopen": False,
+              "name": "Fork1",
+              "objectId": "Fork|-00.48|+00.81|-02.74",
+              "objectType": "Fork",
+              "openable": False,
+              "parentReceptacle": "Cabinet|-00.48|+00.78|-02.74",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.48289254307746887,
+                           "y": 0.8116353750228882,
+                           "z": -2.7390687465667725},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": -1.0245284101983998e-05, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.553860604763031,
+                           0.2711416482925415,
+                           -0.4028606414794922,
+                           -0.16013938188552856,
+                           0.6648629307746887,
+                           -0.00913935899734497],
+              "cameraHorizon": 0.0,
+              "distance": 1.9385002851486206,
+              "isopen": False,
+              "name": "GarbageCan",
+              "objectId": "GarbageCan|-00.36|00.00|-00.21",
+              "objectType": "GarbageCan",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.3569999933242798,
+                           "y": -3.196139175543067e-08,
+                           "z": -0.20600000023841858},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.8528260588645935,
+                           0.9309259057044983,
+                           -3.9095852375030518,
+                           -0.714918315410614,
+                           1.0337982177734375,
+                           -3.7689216136932373],
+              "cameraHorizon": 0.0,
+              "distance": 2.112607479095459,
+              "isopen": False,
+              "name": "CoffeeCup1",
+              "objectId": "Mug|-00.78|+00.93|-03.85",
+              "objectType": "Mug",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.7749999761581421,
+                           "y": 0.9301429986953735,
+                           "z": -3.8499999046325684},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 50.4573860168457, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.19851021468639374,
+                           0.9635931253433228,
+                           -2.7536282539367676,
+                           -0.09219704568386078,
+                           1.3012911081314087,
+                           -2.7334327697753906],
+              "cameraHorizon": 0.0,
+              "distance": 1.3237783908843994,
+              "isopen": False,
+              "name": "Knife1",
+              "objectId": "Knife|-00.14|+01.12|-02.75",
+              "objectType": "Knife",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.14190000295639038,
+                           "y": 1.117300033569336,
+                           "z": -2.7486000061035156},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 10.637146949768066,
+                           "y": 274.3685607910156,
+                           "z": 270.0},
+              "visible": False},
+             {"bounds3D": [-0.5118284225463867,
+                           0.9333651065826416,
+                           -1.9365284442901611,
+                           -0.3299715518951416,
+                           0.9572690725326538,
+                           -1.754671573638916],
+              "cameraHorizon": 0.0,
+              "distance": 0.5890516042709351,
+              "isopen": False,
+              "name": "GasStoveTop_Range1",
+              "objectId": "StoveBurner|-00.42|+00.93|-01.85",
+              "objectType": "StoveBurner",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.42089998722076416,
+                           "y": 0.9301429986953735,
+                           "z": -1.8456000089645386},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.2595430612564087,
+                           1.4952101707458496,
+                           -1.5506460666656494,
+                           -0.06338601559400558,
+                           1.5541222095489502,
+                           -1.3544890880584717],
+              "cameraHorizon": 0.0,
+              "distance": 1.0283229351043701,
+              "isopen": False,
+              "name": "Bowl",
+              "objectId": "Bowl|-00.16|+01.50|-01.45",
+              "objectType": "Bowl",
+              "openable": False,
+              "parentReceptacle": "Cabinet|-00.34|+01.89|-01.29",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.16146452724933624,
+                           "y": 1.495596170425415,
+                           "z": -1.45256769657135},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": -1.0245284101983998e-05, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6566448211669922,
+                           0.8584824800491333,
+                           -2.3290677070617676,
+                           -0.5764960050582886,
+                           0.9386582374572754,
+                           -2.2489333152770996],
+              "cameraHorizon": 0.0,
+              "distance": 0.6654659509658813,
+              "isopen": False,
+              "name": "StoveKnob2_Range4",
+              "objectId": "StoveKnob|-00.62|+00.90|-02.29",
+              "objectType": "StoveKnob",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.6179999709129333,
+                           "y": 0.8999999761581421,
+                           "z": -2.2890000343322754},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 315.0,
+                           "y": 89.97400665283203,
+                           "z": 180.03199768066406},
+              "visible": False},
+             {"bounds3D": [-0.2558910846710205,
+                           0.9301429390907288,
+                           -1.6137478351593018,
+                           -0.0713789314031601,
+                           1.1241569519042969,
+                           -1.2920067310333252],
+              "cameraHorizon": 0.0,
+              "distance": 0.8889735341072083,
+              "isopen": False,
+              "name": "Toaster1",
+              "objectId": "Toaster|-00.16|+00.93|-01.45",
+              "objectType": "Toaster",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.1636350154876709,
+                           "y": 0.9301429986953735,
+                           "z": -1.4528772830963135},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-1.665656328201294,
+                           0.924782931804657,
+                           -3.7827463150024414,
+                           -1.5564723014831543,
+                           1.0276552438735962,
+                           -3.6940536499023438],
+              "cameraHorizon": 0.0,
+              "distance": 2.0850648880004883,
+              "isopen": False,
+              "name": "CoffeeCup1",
+              "objectId": "Mug|-01.63|+00.92|-03.74",
+              "objectType": "Mug",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -1.625,
+                           "y": 0.9240000247955322,
+                           "z": -3.7383999824523926},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 180.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.29263991117477417,
+                           1.5244276523590088,
+                           -2.8414499759674072,
+                           -0.16177701950073242,
+                           2.2490928173065186,
+                           -2.5138638019561768],
+              "cameraHorizon": 0.0,
+              "distance": 1.3632137775421143,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.33|+01.89|-02.51",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [{"objectId": "Plate|-00.15|+01.49|-02.73",
+                                "pivotId": 0}],
+              "position": {"x": -0.3272084593772888,
+                           "y": 1.8867602348327637,
+                           "z": -2.5138635635375977},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": ["Plate|-00.15|+01.49|-02.73"],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6222020983695984,
+                           0.7248871326446533,
+                           -1.614982008934021,
+                           -0.6195090413093567,
+                           0.8706167936325073,
+                           -1.2865678071975708],
+              "cameraHorizon": 0.0,
+              "distance": 0.6155224442481995,
+              "isopen": False,
+              "name": "Drawer",
+              "objectId": "Cabinet|-00.50|+00.78|-01.45",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [{"objectId": "Spoon|-00.50|+00.78|-01.45",
+                                "pivotId": 0}],
+              "position": {"x": -0.5008437633514404,
+                           "y": 0.7795612812042236,
+                           "z": -1.450774908065796},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": ["Spoon|-00.50|+00.78|-01.45"],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.5953136682510376,
+                           0.09301626682281494,
+                           -1.6149822473526,
+                           -0.4644508361816406,
+                           0.6846745014190674,
+                           -1.3194092512130737],
+              "cameraHorizon": 0.0,
+              "distance": 0.7104082107543945,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.63|+00.39|-01.61",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.6298819780349731,
+                           "y": 0.3888453245162964,
+                           "z": -1.6149822473526},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-1.2881675958633423,
+                           0.7248872518539429,
+                           -3.3793442249298096,
+                           -1.0107892751693726,
+                           0.8706167936325073,
+                           -3.376683473587036],
+              "cameraHorizon": 0.0,
+              "distance": 1.7654800415039062,
+              "isopen": False,
+              "name": "Drawer",
+              "objectId": "Cabinet|-01.15|+00.78|-03.50",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -1.1494783163070679,
+                           "y": 0.7825552225112915,
+                           "z": -3.4980251789093018},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-3.5819432735443115,
+                           0.09301620721817017,
+                           -3.3748939037323,
+                           -0.9107897281646729,
+                           0.6846743822097778,
+                           -3.362663507461548],
+              "cameraHorizon": 0.0,
+              "distance": 1.723375678062439,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-01.01|+00.39|-03.37",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -1.010789155960083,
+                           "y": 0.3888453245162964,
+                           "z": -3.368778705596924},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.8397345542907715,
+                           0.09301596879959106,
+                           -3.5855960845947266,
+                           -0.3782111406326294,
+                           0.6846745014190674,
+                           -3.124072551727295],
+              "cameraHorizon": 0.0,
+              "distance": 1.43833327293396,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.63|+00.39|-03.01",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [{"objectId": "Pan|-00.68|+00.08|-03.27",
+                                "pivotId": 0}],
+              "position": {"x": -0.6330178380012512,
+                           "y": 0.3888453245162964,
+                           "z": -3.0088343620300293},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": ["Pan|-00.68|+00.08|-03.27"],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.5953132510185242,
+                           0.09301614761352539,
+                           -2.9192330837249756,
+                           -0.4644504189491272,
+                           0.6846743822097778,
+                           -2.5138638019561768],
+              "cameraHorizon": 0.0,
+              "distance": 1.034377932548523,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.63|+00.39|-02.51",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [{"objectId": "Pot|-00.47|+00.08|-02.74",
+                                "pivotId": 0}],
+              "position": {"x": -0.6298820972442627,
+                           "y": 0.3888453245162964,
+                           "z": -2.5138638019561768},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": ["Pot|-00.47|+00.08|-02.74"],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6035346984863281,
+                           0.7248871326446533,
+                           -2.9642739295959473,
+                           -0.6004599332809448,
+                           0.8706167936325073,
+                           -2.5138635635375977],
+              "cameraHorizon": 0.0,
+              "distance": 1.1347001791000366,
+              "isopen": False,
+              "name": "Drawer",
+              "objectId": "Cabinet|-00.48|+00.78|-02.74",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [{"objectId": "Fork|-00.48|+00.81|-02.74",
+                                "pivotId": 0}],
+              "position": {"x": -0.4819878041744232,
+                           "y": 0.777635395526886,
+                           "z": -2.7390687465667725},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": ["Fork|-00.48|+00.81|-02.74"],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6152604818344116,
+                           1.5292630195617676,
+                           -3.8681092262268066,
+                           -0.15373694896697998,
+                           2.2539286613464355,
+                           -3.406585216522217],
+              "cameraHorizon": 0.0,
+              "distance": 1.907861590385437,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.35|+01.89|-03.29",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.34654390811920166,
+                           "y": 1.8915960788726807,
+                           "z": -3.2933475971221924},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.3028959631919861,
+                           1.5292634963989258,
+                           -1.5821408033370972,
+                           -0.17203307151794434,
+                           2.2539284229278564,
+                           -1.2865678071975708],
+              "cameraHorizon": 0.0,
+              "distance": 1.2184957265853882,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.34|+01.89|-01.29",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [{"objectId": "Bowl|-00.16|+01.50|-01.45",
+                                "pivotId": 0}],
+              "position": {"x": -0.33746451139450073,
+                           "y": 1.8915960788726807,
+                           "z": -1.2865678071975708},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": ["Bowl|-00.16|+01.50|-01.45"],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.33359596133232117,
+                           1.9445738792419434,
+                           -2.497605323791504,
+                           -0.20273306965827942,
+                           2.275726795196533,
+                           -2.12178373336792],
+              "cameraHorizon": 0.0,
+              "distance": 1.508346438407898,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.34|+02.11|-02.50",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.33746451139450073,
+                           "y": 2.1101503372192383,
+                           "z": -2.497605323791504},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.33359596133232117,
+                           1.9445738792419434,
+                           -2.0148353576660156,
+                           -0.20273306965827942,
+                           2.275726795196533,
+                           -1.631803035736084],
+              "cameraHorizon": 0.0,
+              "distance": 1.3153576850891113,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.34|+02.11|-01.63",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.33746451139450073,
+                           "y": 2.1101503372192383,
+                           "z": -1.6318029165267944},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.334695965051651,
+                           1.9445741176605225,
+                           -1.2722522020339966,
+                           -0.20383307337760925,
+                           2.275726556777954,
+                           -0.909758448600769],
+              "cameraHorizon": 0.0,
+              "distance": 1.3944311141967773,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.34|+02.11|-01.27",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.33746451139450073,
+                           "y": 2.1101503372192383,
+                           "z": -1.2722522020339966},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.334695965051651,
+                           1.9445738792419434,
+                           -0.7808091640472412,
+                           -0.20383307337760925,
+                           2.275726795196533,
+                           -0.3908956050872803],
+              "cameraHorizon": 0.0,
+              "distance": 1.8876863718032837,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.34|+02.11|-00.39",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.33746451139450073,
+                           "y": 2.1101503372192383,
+                           "z": -0.39089563488960266},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.29263991117477417,
+                           1.524427890777588,
+                           -3.242128849029541,
+                           -0.16177701950073242,
+                           2.2490928173065186,
+                           -2.9145426750183105],
+              "cameraHorizon": 0.0,
+              "distance": 1.8711798191070557,
+              "isopen": False,
+              "name": "Cabinet",
+              "objectId": "Cabinet|-00.33|+01.89|-03.24",
+              "objectType": "Cabinet",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.3272084593772888,
+                           "y": 1.8867603540420532,
+                           "z": -3.24212908744812},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 270.019775390625, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-1.0901057720184326,
+                           0.7320617437362671,
+                           -3.888105630874634,
+                           -0.12189435958862305,
+                           0.952538251876831,
+                           -2.9198944568634033],
+              "cameraHorizon": 0.0,
+              "distance": 1.700704574584961,
+              "isopen": False,
+              "name": "Sink",
+              "objectId": "Sink|-00.61|+00.94|-03.40",
+              "objectType": "Sink",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.6060000061988831,
+                           "y": 0.9419999718666077,
+                           "z": -3.4040000438690186},
+              "receptacle": True,
+              "receptacleCount": 4,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 44.999996185302734, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.24254396557807922,
+                           0.2711706757545471,
+                           -0.8578107357025146,
+                           -0.18492531776428223,
+                           0.3472771644592285,
+                           -0.8001892566680908],
+              "cameraHorizon": 0.0,
+              "distance": 1.402801513671875,
+              "isopen": False,
+              "name": "Egg",
+              "objectId": "Egg|-00.21|+00.27|-00.83",
+              "objectType": "Egg",
+              "openable": False,
+              "parentReceptacle": "Fridge|-00.22|00.00|-00.83",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.2137332558631897,
+                           "y": 0.2719060778617859,
+                           "z": -0.8289999961853027},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 270.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-1.5313434600830078,
+                           0.9396243691444397,
+                           -3.5390284061431885,
+                           -1.444072961807251,
+                           1.0310288667678833,
+                           -3.452800989151001],
+              "cameraHorizon": 0.0,
+              "distance": 1.813209891319275,
+              "isopen": False,
+              "name": "Apple",
+              "objectId": "Apple|-01.49|+00.93|-03.50",
+              "objectType": "Apple",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -1.4870775938034058,
+                           "y": 0.9303702116012573,
+                           "z": -3.495858669281006},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.42987868189811707,
+                           0.7445617914199829,
+                           -0.7644813060760498,
+                           -0.27457037568092346,
+                           0.8978313207626343,
+                           -0.614234447479248],
+              "cameraHorizon": 0.0,
+              "distance": 1.3395159244537354,
+              "isopen": False,
+              "name": "Lettuce1",
+              "objectId": "Lettuce|-00.33|+00.74|-00.69",
+              "objectType": "Lettuce",
+              "openable": False,
+              "parentReceptacle": "Fridge|-00.22|00.00|-00.83",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.2137332707643509,
+                           "y": 0.7358768582344055,
+                           "z": -0.6933581233024597},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 270.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6563448309898376,
+                           0.8579825162887573,
+                           -1.8734675645828247,
+                           -0.576196014881134,
+                           0.9381582736968994,
+                           -1.7933334112167358],
+              "cameraHorizon": 0.0,
+              "distance": 0.39948585629463196,
+              "isopen": False,
+              "name": "StoveKnob2_Range1",
+              "objectId": "StoveKnob|-00.62|+00.90|-01.83",
+              "objectType": "StoveKnob",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.6176999807357788,
+                           "y": 0.8995000123977661,
+                           "z": -1.833400011062622},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 315.0,
+                           "y": 89.97400665283203,
+                           "z": 180.03199768066406},
+              "visible": False},
+             {"bounds3D": [-0.6007806062698364,
+                           0.9309259057044983,
+                           -1.624263048171997,
+                           -0.4915965795516968,
+                           1.0337982177734375,
+                           -1.5355703830718994],
+              "cameraHorizon": 0.0,
+              "distance": 0.5002012252807617,
+              "isopen": False,
+              "name": "CoffeeCup1",
+              "objectId": "Mug|-00.53|+00.93|-01.58",
+              "objectType": "Mug",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.5322529077529907,
+                           "y": 0.9301429986953735,
+                           "z": -1.5799167156219482},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.3178284764289856,
+                           0.9333651065826416,
+                           -2.3485283851623535,
+                           -0.1359715461730957,
+                           0.9572690725326538,
+                           -2.1666717529296875],
+              "cameraHorizon": 0.0,
+              "distance": 0.9261895418167114,
+              "isopen": False,
+              "name": "GasStoveTop_Range3",
+              "objectId": "StoveBurner|-00.23|+00.93|-02.26",
+              "objectType": "StoveBurner",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.22689999639987946,
+                           "y": 0.9301429986953735,
+                           "z": -2.2576000690460205},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.5608127117156982,
+                           0.9253336787223816,
+                           -2.6081254482269287,
+                           -0.2908085584640503,
+                           0.9346393942832947,
+                           -2.578345537185669],
+              "cameraHorizon": 0.0,
+              "distance": 1.0233911275863647,
+              "isopen": False,
+              "name": "butterKnife",
+              "objectId": "ButterKnife|-00.43|+00.93|-02.60",
+              "objectType": "ButterKnife",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.4278929829597473,
+                           "y": 0.9303703904151917,
+                           "z": -2.5970890522003174},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-1.4711631536483765,
+                           0.9296106696128845,
+                           -3.788638114929199,
+                           -1.1927717924118042,
+                           1.0843539237976074,
+                           -3.621340751647949],
+              "cameraHorizon": 0.0,
+              "distance": 1.98361074924469,
+              "isopen": False,
+              "name": "Bread",
+              "objectId": "Bread|-01.33|+00.93|-03.71",
+              "objectType": "Bread",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -1.3320000171661377,
+                           "y": 0.9303702712059021,
+                           "z": -3.7049999237060547},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 6.309757232666016, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6563448309898376,
+                           0.8581824898719788,
+                           -2.1692676544189453,
+                           -0.576196014881134,
+                           0.9383582472801208,
+                           -2.0891332626342773],
+              "cameraHorizon": 0.0,
+              "distance": 0.5444206595420837,
+              "isopen": False,
+              "name": "StoveKnob2_Range3",
+              "objectId": "StoveKnob|-00.62|+00.90|-02.13",
+              "objectType": "StoveKnob",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.6176999807357788,
+                           "y": 0.8996999859809875,
+                           "z": -2.129199981689453},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 315.0,
+                           "y": 89.97400665283203,
+                           "z": 180.03199768066406},
+              "visible": False},
+             {"bounds3D": [-1.6801782846450806,
+                           0.9300780892372131,
+                           -3.5211691856384277,
+                           -1.5957564115524292,
+                           1.001486897468567,
+                           -3.4346466064453125],
+              "cameraHorizon": 0.0,
+              "distance": 1.8383132219314575,
+              "isopen": False,
+              "name": "Potato",
+              "objectId": "Potato|-01.63|+00.93|-03.48",
+              "objectType": "Potato",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -1.6319999694824219,
+                           "y": 0.9303702116012573,
+                           "z": -3.475545883178711},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.3178284764289856,
+                           0.9333651065826416,
+                           -1.9365284442901611,
+                           -0.1359715461730957,
+                           0.9572690725326538,
+                           -1.754671573638916],
+              "cameraHorizon": 0.0,
+              "distance": 0.7805821895599365,
+              "isopen": False,
+              "name": "GasStoveTop_Range2",
+              "objectId": "StoveBurner|-00.23|+00.93|-01.85",
+              "objectType": "StoveBurner",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.22689999639987946,
+                           "y": 0.9301429986953735,
+                           "z": -1.8456000089645386},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-2.784135103225708,
+                           0.9281330108642578,
+                           -3.721567153930664,
+                           -2.5158650875091553,
+                           1.3016245365142822,
+                           -3.4185357093811035],
+              "cameraHorizon": 0.0,
+              "distance": 2.460068941116333,
+              "isopen": False,
+              "name": "CoffeeMachine2",
+              "objectId": "CoffeeMachine|-02.65|+00.93|-03.57",
+              "objectType": "CoffeeMachine",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -2.6500000953674316,
+                           "y": 0.9303701519966125,
+                           "z": -3.5739998817443848},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.6211026906967163,
+                           0.7797816395759583,
+                           -1.4715903997421265,
+                           -0.41446253657341003,
+                           0.7992590069770813,
+                           -1.4300788640975952],
+              "cameraHorizon": 0.0,
+              "distance": 0.6147258281707764,
+              "isopen": False,
+              "name": "Spoon",
+              "objectId": "Spoon|-00.50|+00.78|-01.45",
+              "objectType": "Spoon",
+              "openable": False,
+              "parentReceptacle": "Cabinet|-00.50|+00.78|-01.45",
+              "pickupable": True,
+              "pivotSimObjs": [],
+              "position": {"x": -0.4998437762260437,
+                           "y": 0.784561276435852,
+                           "z": -1.450774908065796},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": -1.0245284101983998e-05, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.5118284225463867,
+                           0.9333651065826416,
+                           -2.3485283851623535,
+                           -0.3299715518951416,
+                           0.9572690725326538,
+                           -2.1666717529296875],
+              "cameraHorizon": 0.0,
+              "distance": 0.7716866731643677,
+              "isopen": False,
+              "name": "GasStoveTop_Range4",
+              "objectId": "StoveBurner|-00.42|+00.93|-02.26",
+              "objectType": "StoveBurner",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.42089998722076416,
+                           "y": 0.9301429986953735,
+                           "z": -2.2576000690460205},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.5738816261291504,
+                           0.0948454737663269,
+                           -2.837768316268921,
+                           -0.37388163805007935,
+                           0.2948455214500427,
+                           -2.637768030166626],
+              "cameraHorizon": 0.0,
+              "distance": 1.4331694841384888,
+              "isopen": False,
+              "name": "Pot1",
+              "objectId": "Pot|-00.47|+00.08|-02.74",
+              "objectType": "Pot",
+              "openable": False,
+              "parentReceptacle": "Cabinet|-00.63|+00.39|-02.51",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.4738820791244507,
+                           "y": 0.08484548330307007,
+                           "z": -2.737863779067993},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": -1.0245284101983998e-05, "y": 0.0, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-2.613636016845703,
+                           0.0006269514560699463,
+                           -3.853076219558716,
+                           -2.085458755493164,
+                           0.874946117401123,
+                           -3.286182165145874],
+              "cameraHorizon": 0.0,
+              "distance": 2.494718313217163,
+              "isopen": False,
+              "name": "Chair5",
+              "objectId": "Chair|-02.35|00.00|-03.60",
+              "objectType": "Chair",
+              "openable": False,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -2.3540000915527344,
+                           "y": -5.653919288306497e-07,
+                           "z": -3.6019999980926514},
+              "receptacle": False,
+              "receptacleCount": 0,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 74.23304748535156, "z": 0.0},
+              "visible": False},
+             {"bounds3D": [-0.3505246043205261,
+                           1.5073667764663696,
+                           -2.2319486141204834,
+                           0.009090721607208252,
+                           1.8599165678024292,
+                           -1.720513105392456],
+              "cameraHorizon": 0.0,
+              "distance": 1.0151386260986328,
+              "isopen": False,
+              "name": "Microwave4",
+              "objectId": "Microwave|-00.17|+01.49|-02.06",
+              "objectType": "Microwave",
+              "openable": True,
+              "parentReceptacle": "",
+              "pickupable": False,
+              "pivotSimObjs": [],
+              "position": {"x": -0.1746000051498413,
+                           "y": 1.485553503036499,
+                           "z": -2.055999994277954},
+              "receptacle": True,
+              "receptacleCount": 1,
+              "receptacleObjectIds": [],
+              "rotation": {"x": 0.0, "y": 0.0, "z": 0.0},
+              "visible": False}],
+ "sceneName": "FloorPlan28",
+ "screenHeight": 300,
+ "screenWidth": 300
+}
diff --git a/gym_ai2thor/task_utils.py b/gym_ai2thor/task_utils.py
new file mode 100644
index 0000000..7a2c68c
--- /dev/null
+++ b/gym_ai2thor/task_utils.py
@@ -0,0 +1,179 @@
+import math
+import random
+
+import numpy as np
+import torch
+import matplotlib as mpl
+mpl.use('TkAgg')  # or whatever other backend that you want
+from matplotlib import pyplot as plt
+import matplotlib.patches as patches
+
+
+##############################
+#------- Visualisation ------#
+##############################
+
+matplotlib_colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
+
+def show_bounding_boxes(event, key_terms=None, lines_between_boxes_and_crosshair=False):
+    fig2 = plt.figure()
+    ax2 = fig2.add_subplot(111)
+
+    show_all = True if not key_terms else False
+
+    check_if = lambda x, name: True if x.lower() in name.lower() else False
+
+    plt.imshow(event.frame, interpolation='nearest')
+    for key, arr in event.instance_detections2D.items():
+        if show_all or any([check_if(term, key) for term in key_terms]):
+            x1, y1, x2, y2 = list(arr)
+            bbox_color = matplotlib_colors[random.randint(0, len(matplotlib_colors) - 1)]
+            print(key)
+            print(x1, y1, x2, y2, ' with color: {}'.format(bbox_color))
+
+            rect = patches.Rectangle((x1, y1), abs(x2 - x1), abs(y2 - y1), linewidth=1,
+                                     edgecolor=bbox_color,
+                                     facecolor='none')
+            ax2.add_patch(rect)
+            if lines_between_boxes_and_crosshair:
+                crosshair_x, crosshair_y = 150, 150
+                bbox_x_cent, bbox_y_cent = (x2 + x1) / 2, (y2 + y1) / 2
+                x_coords, y_coords = [crosshair_x, bbox_x_cent], [crosshair_y, bbox_y_cent]
+                plt.plot(x_coords, y_coords, marker='o', markersize=3, color=bbox_color)
+                dist = math.sqrt((crosshair_x - bbox_x_cent) ** 2 +
+                                 (crosshair_y - bbox_y_cent) ** 2)
+                print('2D distance of bbox centre to crosshair: {}'.format(round(dist, 3)))
+    plt.show()
+
+def show_instance_segmentation(event, key_terms):
+    fig2 = plt.figure()
+    ax2 = fig2.add_subplot(111)
+
+    show_all = True if not key_terms else False
+
+    # check if objectType is within objectId
+    check_if = lambda x, name: True if x.lower() in name.lower() else False
+
+    event_frame_with_segmentation = event.frame.copy()
+    for key, mask_arr in event.instance_masks.items():
+        if show_all or any([check_if(term, key) for term in key_terms]):
+            nonzero_indices = mask_arr.nonzero()
+
+            event_frame_with_segmentation[nonzero_indices[0], nonzero_indices[1], :] = \
+                np.array([255, 255, 255])  # only white for now
+
+    plt.imshow(event_frame_with_segmentation, interpolation='nearest')
+    plt.show()
+
+##############################
+#------- Preprocessing ------#
+##############################
+
+def get_word_to_idx(train_instructions):
+    word_to_idx = {}
+    for instruction_data in train_instructions:
+        instruction = instruction_data
+        for word in instruction.split(" "):
+            if word not in word_to_idx:
+                word_to_idx[word] = len(word_to_idx)
+    return word_to_idx
+
+def turn_instruction_str_to_tensor(instruction, env):
+    instruction_indices = []
+    for word in instruction.split(" "):
+        instruction_indices.append(env.task.word_to_idx[word])
+    instruction_indices = np.array(instruction_indices)
+    instruction_indices = torch.from_numpy(instruction_indices).view(1, -1)
+    return instruction_indices
+
+def unpack_state(state, env):
+    """
+    Returns (tensor image, None) if no natural language instructions and
+    (tensor image, instruction embedding indices) if the task involves natural language.
+    More specifically converts from numpy to tensor image and from string to embedding indices array
+    and returns unpacked state into a tuple.
+    """
+    instruction_indices = None
+    if not env.task.has_language_instructions:
+        image_state = state
+    else:
+        # natural language instruction is within state so unpack tuple
+        (image_state, instruction) = state
+
+        instruction_indices = turn_instruction_str_to_tensor(instruction, env)
+    image_state = torch.from_numpy(image_state)
+
+    return image_state, instruction_indices
+
+##############################
+#----- Reward Functions -----#
+##############################
+
+def check_if_focus_and_close_enough_to_object_type(event, object_type='Mug',
+                                                   distance_threshold_3d=1.0,
+                                                   distance_threshold_2d=90, verbose=False):
+    all_objects_for_object_type = [obj for obj in event.metadata['objects']
+                                   if obj['objectType'] == object_type]
+
+    if not all_objects_for_object_type:
+        full_list_of_objects_types = [obj['objectType'] for obj in event.metadata['objects']]
+        raise ValueError('Object type: "{}" which was chosen for object target from the last word '
+                 'of instruction is not available in ai2thor at all or just is\'nt in this scene.'
+                 ' The full list of object types in this scene: {}'.format(object_type,
+                                                   ', '.join(full_list_of_objects_types)))
+
+    bool_list = []
+    for idx, obj in enumerate(all_objects_for_object_type):
+        bounds = event.instance_detections2D.get(obj['objectId'])
+        if bounds is None:
+            continue
+
+        x1, y1, x2, y2 = bounds
+        euc_distance_to_obj = calculate_euc_distance_between_agent_and_object(
+            event.metadata['agent'], obj)
+        bool_list.append(check_if_focus_and_close_enough(x1, y1, x2, y2, euc_distance_to_obj,
+                                                         distance_threshold_2d,
+                                                         distance_threshold_3d, verbose))
+
+    return sum(bool_list)
+
+def calculate_euc_distance_between_agent_and_object(event_metadata_agent, obj, two_dimensions=True):
+    a_x, a_y, a_z = event_metadata_agent['position']['x'], \
+                    event_metadata_agent['position']['y'], \
+                    event_metadata_agent['position']['z']
+    obj_x, obj_y, obj_z = obj['position']['x'], obj['position']['y'], obj['position']['z']
+    if two_dimensions:
+        euclidean_distance_to_obj = math.sqrt((obj_x - a_x) ** 2 + (obj_z - a_z) ** 2)
+    else:
+        euclidean_distance_to_obj = math.sqrt((obj_x - a_x) ** 2 + (obj_y - a_y) ** 2 +
+                                              (obj_z - a_z) ** 2)
+    return euclidean_distance_to_obj
+
+def check_if_focus_and_close_enough(x1, y1, x2, y2, distance_3d, distance_threshold_2d,
+                                    distance_threshold_3d, verbose=False):
+    focus_bool = is_bounding_box_centre_close_to_crosshair(x1, y1, x2, y2, verbose=verbose,
+                                                       distance_threshold_2d=distance_threshold_2d)
+    close_bool = euclidean_close_enough_3d(distance_3d, distance_threshold_3d, verbose=verbose)
+
+    if verbose:
+        print('Object within 2D distance of crosshair: {}. Object close enough with 3D distance: {}'.format(focus_bool, close_bool))
+
+    return True if focus_bool and close_bool else False
+
+def is_bounding_box_centre_close_to_crosshair(x1, y1, x2, y2, distance_threshold_2d, verbose=False):
+    """
+    object's bounding box has to be mostly within the 100x100 middle of the image.
+    Could also use distance within obj type but decided to do this instead
+    """
+    bbox_x_cent, bbox_y_cent = (x2 + x1) / 2, (y2 + y1) / 2
+    dist = math.sqrt((150 - bbox_x_cent) ** 2 + (150 - bbox_y_cent) ** 2)
+    if verbose:
+        print('Euclidean 2D distance to crosshair: {}. distance_threshold_2d: {}'.format(dist,
+                                                                            distance_threshold_2d))
+    return True if dist < distance_threshold_2d else False
+
+def euclidean_close_enough_3d(distance, distance_threshold_3d, verbose=False):
+    if verbose:
+        print('Euclidean 3D distance: {}. distance_threshold_3d: {}'.format(distance,
+                                                                    distance_threshold_3d))
+    return True if distance < distance_threshold_3d else False
diff --git a/gym_ai2thor/tasks.py b/gym_ai2thor/tasks.py
index cda936a..e77037e 100644
--- a/gym_ai2thor/tasks.py
+++ b/gym_ai2thor/tasks.py
@@ -2,8 +2,10 @@
 Different task implementations that can be defined inside an ai2thor environment
 """
 from collections import Counter
+import random
 
 from gym_ai2thor.utils import InvalidTaskParams
+from gym_ai2thor.task_utils import get_word_to_idx, check_if_focus_and_close_enough_to_object_type
 
 
 class TaskFactory:
@@ -17,13 +19,22 @@ def create_task(config):
         :param config: parsed config file
         :return: Task instance initialized
         """
+        # todo maybe remove taskfactory
         task_name = config['task']['task_name']
-        if task_name == 'PickUp':
+        if task_name == 'PickUpTask':
             if config['task']['target_object'] in config['pickup_objects']:
-                return PickupTask(**config['task'])
+                return PickupTask(**config)
             else:
                 raise InvalidTaskParams('Error initializing PickUpTask. {} is not '
                                         'pickupable!'.format(config['task']['target_object']))
+        elif task_name == 'NaturalLanguageLookAtObjectTask':
+            return NaturalLanguageLookAtObjectTask(**config)
+        elif task_name == 'NaturalLanguageNavigateToObjectTask':
+            return NaturalLanguageNavigateToObjectTask(**config)
+        elif task_name == 'NaturalLanguagePickUpObjectTask':
+            return NaturalLanguagePickUpObjectTask(**config)
+        elif task_name == 'NaturalLanguagePickUpMultipleObjectTask':
+            return NaturalLanguagePickUpMultipleObjectTask(**config)
         else:
             raise NotImplementedError('{} is not yet implemented!'.format(task_name))
 
@@ -32,24 +43,29 @@ class BaseTask:
     """
     Base class and factory for tasks to be defined for a specific environment
     """
-    def __init__(self, config):
-        self.task_config = config
-        self.max_episode_length = config['max_episode_length'] \
-            if 'max_episode_length' in config else 1000
-        self.movement_reward = config['movement_reward'] if 'movement_reward' in config else 0
+    def __init__(self, **kwargs):
+        self.config = kwargs
+        self.has_language_instructions = False
+        self.max_episode_length = self.config['max_episode_length'] \
+            if 'max_episode_length' in self.config else 1000
+        self.movement_reward = self.config.get('movement_reward', 0)
         self.step_num = 0
-
-        self.reset()
+        self.max_object_pickup_crosshair_dist = float('inf')
+        self.max_object_pickup_euclidean_dist = None
+        self.random_scene_ids_on_reset = self.config['task'].get('random_scene_ids_on_reset')
 
     def transition_reward(self, state):
         """
         Returns the reward given the corresponding information (state, dictionary with objects
         collected, distance to goal, etc.) depending on the task.
-        :return: (args, kwargs) First elemnt represents the reward obtained at the step
+        :return: (args, kwargs) First element represents the reward obtained at the step
                                 Second element represents if episode finished at this step
         """
         raise NotImplementedError
 
+    def get_extra_state(self):
+        return None
+
     def reset(self):
         """
 
@@ -61,21 +77,25 @@ def reset(self):
 
 class PickupTask(BaseTask):
     """
-    This task consists on picking up an target object. Rewards are only collected if the right
+    This task consists of picking up an target object. Rewards are only collected if the right
     object was added to the inventory with the action PickUp (See gym_ai2thor.envs.ai2thor_env for
     details).
     """
     def __init__(self, target_objects=('Mug',), goal=None, **kwargs):
+        super().__init__(**kwargs)
         self.target_objects = target_objects
         self.goal = Counter(goal if goal else {obj: float('inf') for obj in self.target_objects})
         self.pickedup_objects = Counter()
         self.object_rewards = Counter(self.target_objects)  # all target objects give reward 1
         self.prev_inventory = []
-        super().__init__(kwargs)
+        self.max_object_pickup_crosshair_dist = kwargs['task'].get(
+                                            'max_object_pickup_crosshair_dist', float('inf'))
 
-    def transition_reward(self, state):
+        self.reset()
+
+    def transition_reward(self, event):
         reward, done = self.movement_reward, False
-        curr_inventory = state.metadata['inventoryObjects']
+        curr_inventory = event.metadata['inventoryObjects']
         object_picked_up = not self.prev_inventory and curr_inventory and \
                            curr_inventory[0]['objectType'] in self.target_objects
 
@@ -93,10 +113,235 @@ def transition_reward(self, state):
             print('Reached goal at step {}'.format(self.step_num))
             done = True
 
-        self.prev_inventory = state.metadata['inventoryObjects']
+        self.prev_inventory = event.metadata['inventoryObjects']
         return reward, done
 
     def reset(self):
         self.pickedup_objects = Counter()
         self.prev_inventory = []
         self.step_num = 0
+
+
+class NaturalLanguageBaseTask(BaseTask):
+    """
+    Natural Language base task for storing train_instructions, word_to_idx, curr_instruction and
+    target object. get_extra_state() for returning sentence instruction as part of state tuple.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.task_has_language_instructions = True
+        # natural language instructions state settings
+        # todo make sure object boxes is turned on in env. Need rainbow branch
+        self.train_instructions = ('Bowl', 'Mug') if not kwargs['task'].get('list_of_instructions')\
+            else kwargs['task']['list_of_instructions']
+        self.word_to_idx = get_word_to_idx(self.train_instructions)
+
+        # get current instruction and object type
+        self.curr_instruction_idx = random.randint(0, len(self.train_instructions) - 1)
+        self.curr_instruction = self.train_instructions[self.curr_instruction_idx]
+        # always last word of the sentence. Needs to be spelled exactly for it to work
+        self.curr_object_type = self.curr_instruction.split(' ')[-1]
+        print('Current instruction: {}. object type (last word in sentence): {}'.format(
+            self.curr_instruction, self.curr_object_type))
+
+        self.default_reward = kwargs['task'].get('default_reward', 1)
+
+    def get_extra_state(self):
+        return self.curr_instruction
+
+    def reset(self):
+        self.curr_instruction_idx = random.randint(0, len(self.train_instructions) - 1)
+        self.curr_instruction = self.train_instructions[self.curr_instruction_idx]
+
+        # always last word of the sentence. Has to be spelled exactly
+        self.curr_object_type = self.curr_instruction.split(' ')[-1]
+        print('Current instruction: {}. object type (last word in sentence): {} '.format(
+            self.curr_instruction, self.curr_object_type))
+
+        return self.curr_instruction
+
+
+class NaturalLanguageLookAtObjectTask(NaturalLanguageBaseTask):
+    """
+    This task consists of requiring the agent to get close to the object type and look at it
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        # how far the target object is from the agent to receive reward
+        self.distance_threshold_3d = kwargs['task'].get('distance_threshold_3d', 1.0)
+        # self.terminal_if_lookat_wrong_object # todo hard to refactor and not worth it? but cozmo had it this way. how to define what the wrong object is? Impossible? So rare too.
+
+    def transition_reward(self, event):
+        reward, done = self.movement_reward, False
+        # check if current target object is in middle of screen and close
+        target_objs = check_if_focus_and_close_enough_to_object_type(event, self.curr_object_type,
+                                                 distance_threshold_3d=self.distance_threshold_3d)
+        if target_objs > 0:
+            print('Stared at {} and is close enough. Num objects in view and '
+                  'close: {}'.format(self.curr_object_type, target_objs))
+            reward += self.default_reward
+            done = True
+
+        return reward, done
+
+    def reset(self):
+        return super(NaturalLanguageLookAtObjectTask, self).reset()
+
+
+class NaturalLanguageNavigateToObjectTask(NaturalLanguageBaseTask):
+    """
+    This task consists of requiring the agent to get close to the object type and look at it
+    The closeness is set by distance_threshold=0.84
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def transition_reward(self, event):
+        reward, done = self.movement_reward, False
+        # check if current target object is in middle of screen and close.
+        # Closer than NaturalLanguageLookAtObjectTask
+        target_objs = check_if_focus_and_close_enough_to_object_type(event,
+                                                                event.metadata['curr_object_type'],
+                                                                distance_threshold_3d=0.84)
+        if target_objs > 0:
+            print('Stared at {} and is close enough. Num objects in view and '
+                  'close: {}'.format(self.curr_object_type, target_objs))
+            reward += self.default_reward
+            done = True
+
+        return reward, done
+
+    def reset(self):
+        return super(NaturalLanguageNavigateToObjectTask, self).reset()
+
+
+class NaturalLanguagePickUpObjectTask(NaturalLanguageBaseTask):
+    """
+    This task consists of requiring the agent to pickup the object that is specified in the current
+    instruction. Rewards are only collected if the right object was added to the inventory.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        # make sure pickup objects is turned on and target objects are in pick up objects
+        if not kwargs.get('pickup_put_interaction'):# todo DRY
+            raise ValueError('Need to turn on pickup_put_interaction in config')
+        if not kwargs.get('pickup_objects'):
+            raise ValueError('Need to specify pickup_objects in config')
+        else:
+            for instruction in self.train_instructions:
+                # always last word of the sentence. Has to be spelled exactly
+                object_type = instruction.split(' ')[-1]
+                if object_type not in kwargs['pickup_objects']:
+                    raise ValueError('Target object {} is not in '
+                                     'config[\'pickup_objects\']'.format(object_type))
+
+        self.max_object_pickup_crosshair_dist = kwargs['task'].get(
+            'max_object_pickup_crosshair_dist', float('inf'))
+        self.max_object_pickup_euclidean_dist = kwargs['task'].get(
+            'max_object_pickup_euclidean_dist', None)
+
+        self.prev_inventory = []
+
+    def transition_reward(self, event):
+        reward, done = self.movement_reward, False
+        curr_inventory = event.metadata['inventoryObjects']
+        # nothing previously in inventory and now there is something within inventory
+        object_picked_up = not self.prev_inventory and curr_inventory
+
+        if object_picked_up:
+            # Add reward from the specific object
+            if curr_inventory[0]['objectType'] == self.curr_object_type:
+                print('Picked up correct object')
+                reward += self.default_reward
+            else:
+                print('Picked up wrong object')
+                reward -= self.default_reward
+            done = True
+            print('{} reward collected for picking up object: {} at step: {}!'.format(reward,
+                                                                    curr_inventory[0]['objectType'],
+                                                                    self.step_num))
+
+        if self.max_episode_length and self.step_num >= self.max_episode_length:
+            print('Reached maximum episode length: {}'.format(self.step_num))
+            done = True
+
+        self.prev_inventory = event.metadata['inventoryObjects']
+        return reward, done
+
+    def reset(self):
+        return super(NaturalLanguagePickUpObjectTask, self).reset()
+
+
+class NaturalLanguagePickUpMultipleObjectTask(NaturalLanguageBaseTask):
+    """
+    This task consists of requiring the agent to pickup many different objects that are specified in
+    the current instruction. The difference is that the episode doesn't terminate on pickup.
+    Need a specific build file to disable only 1 item in inventory.
+    Rewards are only collected if the right object was added to the inventory.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        # make sure pickup objects is turned on and target objects are in pick up objects
+        if not kwargs.get('pickup_put_interaction'):
+            raise ValueError('Need to turn on pickup_put_interaction in config')
+        if not kwargs.get('pickup_objects'):
+            raise ValueError('Need to specify pickup_objects in config')
+        else:
+            for instruction in self.train_instructions:
+                # always last word of the sentence. Has to be spelled exactly
+                object_type = instruction.split(' ')[-1]
+                if object_type not in kwargs['pickup_objects']:
+                    raise ValueError('Target object {} is not in '
+                                     'config[\'pickup_objects\']'.format(object_type))
+
+        self.max_object_pickup_crosshair_dist = kwargs['task'].get(
+            'max_object_pickup_crosshair_dist', float('inf'))
+        self.max_object_pickup_euclidean_dist = kwargs['task'].get(
+            'max_object_pickup_euclidean_dist', None)
+
+        self.prev_inventory = []
+
+    def transition_reward(self, event):
+        reward, done = self.movement_reward, False
+        curr_inventory = event.metadata['inventoryObjects']
+
+        # nothing previously in inventory and now there is something within inventory
+        object_picked_up = curr_inventory and len(self.prev_inventory) != len(curr_inventory)
+
+        if object_picked_up:
+            # Add reward from the specific object
+            if curr_inventory[-1]['objectType'] == self.curr_object_type:
+                reward += self.default_reward
+            else:
+                print('Picked up wrong object')
+                reward -= self.default_reward
+            print('{} reward collected for picking up object: {} at step: {}!'.format(reward,
+                                                                    curr_inventory[-1]['objectType'],
+                                                                    self.step_num))
+            print('Inventory: {}'.format(curr_inventory))
+
+        if self.max_episode_length and self.step_num >= self.max_episode_length:
+            print('Reached maximum episode length: {}'.format(self.step_num))
+            done = True
+            print('Collected {} objects'.format(len(curr_inventory)))
+
+        self.prev_inventory = event.metadata['inventoryObjects']
+        return reward, done
+
+    def reset(self):
+        self.prev_inventory = []
+        return super(NaturalLanguagePickUpMultipleObjectTask, self).reset()
+
+"""
+1. PickUp spam putdown cup (default)
+2. PickUp multiple cups
+3. Put a cup in sink
+4. Put 3 cups in sink
+5. Put cup in microwave
+- pick up cups with segmentation on 50 rooms (so it generalises to the white blob)
+"""
diff --git a/gym_ai2thor/utils.py b/gym_ai2thor/utils.py
index 24ff8f5..22538d8 100644
--- a/gym_ai2thor/utils.py
+++ b/gym_ai2thor/utils.py
@@ -51,16 +51,18 @@ def read_config(config_path, config_dict=None):
             if key == 'task':
                 for task_key in config_dict[key]:
                     if task_key in config[key]:
-                        warnings.warn('Key: [\'{}\'][\'{}\'] already in config file with value {}. '
-                                      'Overwriting with value: {}'.format(key, task_key,
+                        if config[key][task_key] != config_dict[key][task_key]:
+                            warnings.warn('Key: [\'{}\'][\'{}\'] already in config file with value '
+                                          '{}. Overwriting with value: {}'.format(key, task_key,
                                                 config[key][task_key], config_dict[key][task_key]))
-                        config[key][task_key] = config_dict[key][task_key]
+                            config[key][task_key] = config_dict[key][task_key]
             # else just a regular check
             elif key in config:
-                warnings.warn('Key: {} already in config file with value {}. '
-                              'Overwriting with value: {}'.format(key, config[key],
+                if config[key] != config_dict[key]:
+                    warnings.warn('Key: {} already in config file with value {}. '
+                                  'Overwriting with value: {}'.format(key, config[key],
                                                                   config_dict[key]))
-                config[key] = config_dict[key]
+            config[key] = config_dict[key]
 
     return config
 
@@ -69,3 +71,4 @@ class InvalidTaskParams(Exception):
     Raised when the user inputs the wrong parameters for creating a task.
     """
     pass
+
diff --git a/requirements.txt b/requirements.txt
index 3186c1d..8616252 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
 ai2thor>=0.0.34
-scikit-image>=0.14.1
\ No newline at end of file
+scikit-image>=0.14.1
+pytorch>=0.4.1
+
diff --git a/setup.py b/setup.py
index 69db71c..e68fa8c 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 from setuptools import setup
 
-setup(name='gym_foo',
+setup(name='gym_ai2thor',
       version='0.0.1',
-      install_requires=['gym', 'ai2thor', 'numpy'])
+      install_requires=['gym', 'ai2thor', 'numpy', 'scikit-image'])
diff --git a/tests/test_ai2thor_wrapper.py b/tests/test_ai2thor_wrapper.py
index bee6dec..80721d0 100644
--- a/tests/test_ai2thor_wrapper.py
+++ b/tests/test_ai2thor_wrapper.py
@@ -1,13 +1,10 @@
 """
 Tests related to the ai2thor environment wrapper.
 """
-import random
-import threading
 import time
 import unittest
 import warnings
 
-import ai2thor.controller
 from gym_ai2thor.envs.ai2thor_env import AI2ThorEnv
 
 
@@ -46,6 +43,7 @@ def test_environments_runs(self):
             time.time() - start, sum(all_step_times) / len(all_step_times)))
 
         self.assertTrue(len(all_step_times) == num_steps)
+        env.close()
 
     def test_cup_task_and_interaction_actions(self):
         """
@@ -61,6 +59,7 @@ def test_cup_task_and_interaction_actions(self):
             'MoveRight', 'OpenObject', 'PutObject', 'PickupObject', 'CloseObject']
 
         env = AI2ThorEnv(config_dict={'scene_id': 'FloorPlan28',
+                                      'gridSize': 0.25,
                                       'acceptable_receptacles': [
                                         'Microwave'  # the used receptacle below
                                       ]})
@@ -76,6 +75,120 @@ def test_cup_task_and_interaction_actions(self):
                     break
             self.assertTrue(sum(rewards) == 2)
 
+        env.close()
+
+    def test_variations_of_natural_language_tasks(self):
+        """
+        test_natural_language_task look at task and other exceptions that should be raised
+        """
+
+        # mug actions were copied from before so is longer but episode should end before picking up
+        actions_to_look_at_mug = ['RotateRight', 'RotateRight', 'MoveAhead', 'MoveAhead',
+                                  'RotateRight', 'MoveAhead', 'MoveAhead', 'RotateLeft',
+                                  'MoveAhead',
+                                  'MoveAhead',
+                                  'MoveAhead', 'RotateLeft', 'LookDown', 'PickupObject',
+                                  'PutObject',
+                                  'LookUp',
+                                  'MoveRight', 'OpenObject', 'PutObject', 'PickupObject',
+                                  'CloseObject']
+
+        actions_to_look_at_apple = ['RotateRight', 'RotateRight', 'MoveAhead', 'MoveAhead',
+                                    'RotateRight', 'MoveAhead', 'MoveAhead', 'MoveAhead',
+                                    'RotateLeft', 'MoveAhead', 'MoveAhead', 'MoveAhead',
+                                    'MoveAhead', 'MoveAhead', 'MoveAhead', 'LookDown',
+                                    'MoveAhead', 'PickupObject']
+
+        actions_to_look_at_tomato = actions_to_look_at_apple[:] + ['RotateLeft', 'MoveAhead',
+                                                                   'RotateRight', 'MoveAhead',
+                                                                   'PickupObject']
+
+        # bread is behind apple
+        actions_to_look_at_bread = actions_to_look_at_tomato[:]
+
+        with self.assertRaises(ValueError):
+            # reset needs to always be called before step
+            env = AI2ThorEnv()
+            env.step(0)
+        env.close()
+
+        with self.assertRaises(ValueError):
+            # 'Cup' object type doesn't exist so ValueError is raised
+            config_dict = {'num_random_actions_at_init': 3,
+                           'lookupdown_actions': True,
+                           'open_close_interaction': True,
+                           'pickup_put_interaction': True,
+                           'gridSize': 0.01,
+                           'task': {
+                               'task_name': 'NaturalLanguageLookAtObjectTask',
+                               'list_of_instructions': ['Cup']
+                           }}
+            env = AI2ThorEnv(config_dict=config_dict)
+            env.reset()
+            env.step(0)
+        env.close()
+
+        config_dicts = [
+                        {'lookupdown_actions': True,
+                       'open_close_interaction': True,
+                       'pickup_put_interaction': True,
+                       'gridSize': 0.25,
+                       'task': {
+                           'task_name': 'NaturalLanguageLookAtObjectTask',
+                           'list_of_instructions': ['Apple', 'Mug', 'Tomato', 'Bread', 'Chair']
+                       }},
+                        {'lookupdown_actions': True,
+                         'open_close_interaction': True,
+                         'pickup_put_interaction': True,
+                         'pickup_objects': ['Apple', 'Mug', 'Chair'],
+                         'gridSize': 0.25,
+                         'task': {
+                             'task_name': 'NaturalLanguagePickUpObjectTask',
+                             'list_of_instructions': ['Apple', 'Mug', 'Chair']
+                         }}]
+        for config_dict in config_dicts:
+            env = AI2ThorEnv(config_dict=config_dict)
+            env.seed(42)
+            for episode in range(12):
+                state = env.reset()
+                rewards = []
+                if state[1] == 'Mug': current_set_of_actions = actions_to_look_at_mug
+                elif state[1] == 'Apple': current_set_of_actions = actions_to_look_at_apple
+                elif state[1] == 'Tomato': current_set_of_actions = actions_to_look_at_tomato
+                elif state[1] == 'Bread': current_set_of_actions = actions_to_look_at_bread
+                else: current_set_of_actions = actions_to_look_at_mug  # no reward for Chair
+
+                for idx, action_str in enumerate(current_set_of_actions):
+                    action = env.action_names.index(action_str)
+                    state, reward, terminal, _ = env.step(action)
+                    self.assertTrue(len(state) == 2)
+
+                    if reward > 0:
+                        print('Looked at: {} and got reward: {}. Episode over'.format(state[1],
+                                                                                      reward))
+                    rewards.append(reward)
+                    if terminal:
+                        break
+
+                sum_of_rewards = sum(rewards)
+                print('Sum of rewards: {}'.format(sum_of_rewards))
+                if state[1] == 'Mug':
+                    self.assertTrue(sum_of_rewards == 1)
+                elif state[1] == 'Apple':
+                    self.assertTrue(sum_of_rewards == 1)
+                elif state[1] == 'Bread':
+                    self.assertTrue(sum_of_rewards == 1)
+                elif state[1] == 'Tomato':
+                    self.assertTrue(sum_of_rewards == 1)
+                else:
+                    if config_dict['task']['task_name'] == 'NaturalLanguagePickUpObjectTask':
+                        # picked up wrong object
+                        self.assertTrue(sum_of_rewards == -1)
+                    else:
+                        # looked at wrong object so reward is 0
+                        self.assertTrue(sum_of_rewards == 0)
+            env.close()
+
     def test_config_override(self):
         """
         Check if reading both a config file and a config dict at the same time works and that the
@@ -83,163 +196,13 @@ def test_config_override(self):
         changed from overwriting
         """
         with warnings.catch_warnings(record=True) as warning_objs:
-            env = AI2ThorEnv(config_file='config_files/config_example.json',
-                             config_dict={'scene_id': 'FloorPlan27'})
+            env = AI2ThorEnv(config_dict={'scene_id': 'FloorPlan27'})
             # checking if correct warning appears (there could be multiple depending on user)
             self.assertTrue([w for w in warning_objs if
                              'Key: scene_id already in config file' in w.message.args[0]])
 
         self.assertTrue(env.scene_id == 'FloorPlan27')
-
-    @staticmethod
-    def test_simple_example():
-        """
-        Taken from here: http://ai2thor.allenai.org/tutorials/examples
-        """
-        controller = ai2thor.controller.Controller()
-        controller.start()
-
-        # Kitchens: FloorPlan1 - FloorPlan30
-        # Living rooms: FloorPlan201 - FloorPlan230
-        # Bedrooms: FloorPlan301 - FloorPlan330
-        # Bathrooms: FloorPLan401 - FloorPlan430
-
-        controller.reset('FloorPlan28')
-        controller.step(dict(action='Initialize', gridSize=0.25))
-
-        event = controller.step(dict(action='MoveAhead'))
-
-        # Numpy Array - shape (width, height, channels), channels are in RGB order
-        event.frame
-
-        # Numpy Array in BGR order suitable for use with OpenCV
-        event.cv2img
-
-        # current metadata dictionary that includes the state of the scene
-        event.metadata
-
-    @staticmethod
-    def test_calling_complex_actions():
-        """
-        Examples of how to interact with environment internals e.g. picking up, placing and
-        opening objects.
-        Taken from here: http://ai2thor.allenai.org/tutorials/examples
-        """
-        controller = ai2thor.controller.Controller()
-        controller.start()
-
-        controller.reset('FloorPlan28')
-        controller.step(dict(action='Initialize', gridSize=0.25))
-
-        controller.step(dict(action='Teleport', x=-1.25, y=1.00, z=-1.5))
-        controller.step(dict(action='LookDown'))
-        event = controller.step(dict(action='Rotate', rotation=90))
-        # In FloorPlan28, the agent should now be looking at a mug
-        for obj in event.metadata['objects']:
-            if obj['visible'] and obj['pickupable'] and obj['objectType'] == 'Mug':
-                event = controller.step(dict(action='PickupObject', objectId=obj['objectId']),
-                                        raise_for_failure=True)
-                mug_object_id = obj['objectId']
-                break
-
-        # the agent now has the Mug in its inventory
-        # to put it into the Microwave, we need to open the microwave first
-
-        event = controller.step(dict(action='LookUp'))
-        for obj in event.metadata['objects']:
-            if obj['visible'] and obj['openable'] and obj['objectType'] == 'Microwave':
-                event = controller.step(dict(action='OpenObject', objectId=obj['objectId']),
-                                        raise_for_failure=True)
-                receptacle_object_id = obj['objectId']
-                break
-
-        event = controller.step(dict(action='MoveRight'), raise_for_failure=True)
-        event = controller.step(dict(action='PutObject',
-                                     receptacleObjectId=receptacle_object_id,
-                                     objectId=mug_object_id),
-                                raise_for_failure=True)
-
-        # close the microwave
-        event = controller.step(dict(
-            action='CloseObject',
-            objectId=receptacle_object_id), raise_for_failure=True)
-
-    @staticmethod
-    def test_multithreaded():
-        """
-        Stress test and also shows how multi-threading can be used to greatly speed up processing,
-        specially to support the rendering of class, object and depth images.
-        Adapted from here: http://ai2thor.allenai.org/tutorials/examples
-
-        Extra analysis done on adding unity information. Important for training models to know.
-        ~67 FPS with 1 thread no extra info
-        ~61 FPS with 1 thread added class info
-        ~18 FPS with 1 thread added Object info on top
-        ~17 FPS with 1 thread added Depth info on top
-
-        ~70 FPS with 2 threads and no depth, class and object image
-        ~15 FPS with 2 threads and all three of those
-
-        Good examples of how to multi-thread are below
-        """
-        thread_count = 1
-
-        def run(thread_num):
-            """
-            Runs 5 iterations of 10 steps of the environment with the different rendering options
-            :param thread_num: (int) Number of threads to launch
-            """
-            env = ai2thor.controller.Controller()
-            env.start()
-
-            render_depth_image, render_class_image, render_object_image = False, False, False
-
-            # 50 is an arbritary number
-            for i in range(5):
-                t_start = time.time()
-                env.reset('FloorPlan1')
-                # env.step({'action': 'Initialize', 'gridSize': 0.25})
-
-                # Compare the performance with all the extra added information
-                # Big take away is that Object instance information makes it much slower
-                if i == 2:
-                    render_class_image = True
-                    print('Thread num: {}. Added Class info'.format(thread_num))
-                elif i == 3:
-                    render_object_image = True
-                    print('Thread num: {}. Added Object info'.format(thread_num))
-                elif i == 4:
-                    render_depth_image = True
-                    print('Thread num: {}. Added Depth info'.format(thread_num))
-
-                env.step(dict(action='Initialize',
-                              gridSize=0.25,
-                              renderDepthImage=render_depth_image,
-                              renderClassImage=render_class_image,
-                              renderObjectImage=render_object_image))
-                print('Thread num: {}. init time: {}'.format(thread_num, time.time() - t_start))
-                t_start_total = time.time()
-                for _ in range(10):
-                    env.step({'action': 'MoveAhead'})
-                    env.step({'action': 'RotateRight'})
-                total_time = time.time() - t_start_total
-                print('Thread num: {}. Total time for 10 steps: {}. {:.2f} fps'.
-                      format(thread_num, total_time, 50 / total_time))
-
-        threads = [threading.Thread(target=run, args=(thread_num, ))
-                   for thread_num in range(thread_count)]
-        for thread in threads:
-            thread.daemon = True
-            thread.start()
-            time.sleep(1)
-
-        for thread in threads:
-            # calling join() in a loop/timeout to allow for Python 2.7
-            # to be interrupted with SIGINT
-            while thread.isAlive():
-                thread.join(1)
-
-        print('done')
+        env.close()
 
 
 if __name__ == '__main__':