Source code for cntk.contrib.deeprl.agent.shared.models

# Copyright (c) Microsoft. All rights reserved.

# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
"""A set of predefined models used by Q learning or Actor-Critic."""

import cntk as C
import numpy as np

import ast


[docs]class Models: """A set of predefined models to approximate Q or log of pi (policy). The loss function needs to be 'cross_entropy_with_softmax' for policy gradient methods. """ @staticmethod
[docs] def feedforward_network(shape_of_inputs, number_of_outputs, model_hidden_layers, loss_function=None, use_placeholder_for_input=False): """Feedforward network to approximate Q or log of pi. Args: shape_of_inputs: tuple of array (input) dimensions. number_of_outputs: dimension of output, equals the number of possible actions. model_hidden_layers: string representing a list of integers corresponding to number of nodes in each hidden layer. loss_function: if not specified, use squared loss by default. use_placeholder_for_input: if true, inputs have to be replaced later with actual input_variable. Returns: a Python dictionary with string valued keys including 'inputs', 'outputs', 'loss' and 'f'. """ # input/output inputs = C.ops.placeholder(shape=shape_of_inputs) \ if use_placeholder_for_input \ else C.ops.input_variable(shape=shape_of_inputs, dtype=np.float32) outputs = C.ops.input_variable(shape=(number_of_outputs,), dtype=np.float32) # network structure hidden_layers = ast.literal_eval(model_hidden_layers) f = C.layers.Sequential([ C.layers.For(range(len(hidden_layers)), lambda h: C.layers.Dense(hidden_layers[h], activation=C.ops.relu)), C.layers.Dense(number_of_outputs, activation=None) ])(inputs) if loss_function is None: loss = C.losses.squared_error(f, outputs) else: loss = loss_function(f, outputs) return { 'inputs': inputs, 'outputs': outputs, 'f': f, 'loss': loss }
@staticmethod
[docs] def dueling_network(shape_of_inputs, number_of_outputs, model_hidden_layers, loss_function=None, use_placeholder_for_input=False): """Dueling network to approximate Q function. See paper at https://arxiv.org/pdf/1511.06581.pdf. Args: shape_of_inputs: tuple of array (input) dimensions. number_of_outputs: dimension of output, equals the number of possible actions. model_hidden_layers: in the form of "[comma-separated integers, [comma-separated integers], [comma-separated integers]]". Each integer is the number of nodes in a hidden layer.The first set of integers represent the shared component in dueling network. The second set correponds to the state value function V and the third set correponds to the advantage function A. loss_function: if not specified, use squared loss by default. use_placeholder_for_input: if true, inputs have to be replaced later with actual input_variable. Returns: a Python dictionary with string-valued keys including 'inputs', 'outputs', 'loss' and 'f'. """ # input/output inputs = C.ops.placeholder(shape=shape_of_inputs) \ if use_placeholder_for_input \ else C.ops.input_variable(shape=shape_of_inputs, dtype=np.float32) outputs = C.ops.input_variable( shape=(number_of_outputs,), dtype=np.float32) # network structure shared_hidden_layers, v_hidden_layers, a_hidden_layers =\ Models._parse_dueling_network_structure(model_hidden_layers) # shared layers s = C.layers.For( range(len(shared_hidden_layers)), lambda h: C.layers.Dense(shared_hidden_layers[h], activation=C.ops.relu))(inputs) # Value function v = C.layers.Sequential([ C.layers.For( range(len(v_hidden_layers)), lambda h: C.layers.Dense(v_hidden_layers[h], activation=C.ops.relu)), C.layers.Dense(1, activation=None) ])(s) # Advantage function a = C.layers.Sequential([ C.layers.For( range(len(a_hidden_layers)), lambda h: C.layers.Dense(a_hidden_layers[h], activation=C.ops.relu)), C.layers.Dense(number_of_outputs, activation=None) ])(s) # Q = V + A - avg(A) avg_a = C.layers.AveragePooling((number_of_outputs,))(a) q = v + a - avg_a if loss_function is None: loss = C.losses.squared_error(q, outputs) else: loss = loss_function(q, outputs) return { 'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss }
@staticmethod def _parse_dueling_network_structure(hidden_layers_str): hidden_layers = ast.literal_eval(hidden_layers_str) if not ( len(hidden_layers) > 2 and isinstance(hidden_layers[-1], list) and isinstance(hidden_layers[-2], list)): raise ValueError('Invalid dueling network structure.') return\ Models._remove_none_elements_from_list(hidden_layers[:-2]),\ Models._remove_none_elements_from_list(hidden_layers[-2]),\ Models._remove_none_elements_from_list(hidden_layers[-1]) @staticmethod def _remove_none_elements_from_list(value_list): return [e for e in value_list if e is not None]