-
Notifications
You must be signed in to change notification settings - Fork 752
Expand file tree
/
Copy pathactor_network.py
More file actions
138 lines (118 loc) · 5.2 KB
/
actor_network.py
File metadata and controls
138 lines (118 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# coding=utf-8
# Copyright 2020 The TF-Agents Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sample Actor network to use with DDPG agents.
Note: This network scales actions to fit the given spec by using `tanh`. Due to
the nature of the `tanh` function, actions near the spec bounds cannot be
returned.
"""
import gin
import tensorflow as tf
from tf_agents.networks import network
from tf_agents.networks import utils
from tf_agents.specs import tensor_spec
from tf_agents.utils import common
@gin.configurable
class ActorNetwork(network.Network):
"""Creates an actor network."""
def __init__(
self,
input_tensor_spec,
output_tensor_spec,
fc_layer_params=None,
dropout_layer_params=None,
conv_layer_params=None,
activation_fn=tf.keras.activations.relu,
kernel_initializer=None,
last_kernel_initializer=None,
name='ActorNetwork',
):
"""Creates an instance of `ActorNetwork`.
Args:
input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
inputs.
output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
the outputs.
fc_layer_params: Optional list of fully_connected parameters, where each
item is the number of units in the layer.
dropout_layer_params: Optional list of dropout layer parameters, each item
is the fraction of input units to drop or a dictionary of parameters
according to the keras.Dropout documentation. The additional parameter
`permanent`, if set to True, allows to apply dropout at inference for
approximated Bayesian inference. The dropout layers are interleaved with
the fully connected layers; there is a dropout layer after each fully
connected layer, except if the entry in the list is None. This list must
have the same length of fc_layer_params, or be None.
conv_layer_params: Optional list of convolution layers parameters, where
each item is a length-three tuple indicating (filters, kernel_size,
stride).
activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
kernel_initializer: kernel initializer for all layers except for the value
regression layer. If None, a VarianceScaling initializer will be used.
last_kernel_initializer: kernel initializer for the value regression
layer. If None, a RandomUniform initializer will be used.
name: A string representing name of the network.
Raises:
ValueError: If `input_tensor_spec` or `action_spec` contains more than one
item, or if the action data type is not `float`.
"""
super(ActorNetwork, self).__init__(
input_tensor_spec=input_tensor_spec, state_spec=(), name=name
)
output_tensor_spec = tensor_spec.from_spec(output_tensor_spec)
if len(tf.nest.flatten(input_tensor_spec)) > 1:
raise ValueError('Only a single observation is supported by this network')
flat_action_spec = tf.nest.flatten(output_tensor_spec)
if len(flat_action_spec) > 1:
raise ValueError('Only a single action is supported by this network')
self._single_action_spec = flat_action_spec[0]
if self._single_action_spec.dtype not in [tf.float32, tf.float64]:
raise ValueError('Only float actions are supported by this network.')
if kernel_initializer is None:
kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
scale=1.0 / 3.0, mode='fan_in', distribution='uniform'
)
if last_kernel_initializer is None:
last_kernel_initializer = tf.keras.initializers.RandomUniform(
minval=-0.003, maxval=0.003
)
# TODO(kbanoop): Replace mlp_layers with encoding networks.
self._mlp_layers = utils.mlp_layers(
conv_layer_params,
fc_layer_params,
dropout_layer_params,
activation_fn=activation_fn,
kernel_initializer=kernel_initializer,
name='input_mlp',
)
self._mlp_layers.append(
tf.keras.layers.Dense(
flat_action_spec[0].shape.num_elements(),
activation=tf.keras.activations.tanh,
kernel_initializer=last_kernel_initializer,
name='action',
)
)
self._output_tensor_spec = output_tensor_spec
def call(self, observations, step_type=(), network_state=(), training=False):
del step_type # unused.
observations = tf.nest.flatten(observations)
output = tf.cast(observations[0], tf.float32)
for layer in self._mlp_layers:
output = layer(output, training=training)
actions = common.scale_to_spec(output, self._single_action_spec)
output_actions = tf.nest.pack_sequence_as(
self._output_tensor_spec, [actions]
)
return output_actions, network_state