diff --git a/README.md b/README.md index 4125c53..102063c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # [Learning to Learn](https://arxiv.org/abs/1606.04474) in TensorFlow -Compatible with TensorFlow 1.0 + +## Dependencies + +* [TensorFlow >=1.0](https://www.tensorflow.org/) +* [Sonnet >=1.0](https://github.com/deepmind/sonnet) ## Training diff --git a/meta.py b/meta.py index 9de24a1..c4f2880 100644 --- a/meta.py +++ b/meta.py @@ -23,13 +23,13 @@ import os import mock +import sonnet as snt import tensorflow as tf from tensorflow.python.framework import ops from tensorflow.python.util import nest import networks -import nn def _nested_assign(ref, value): @@ -379,7 +379,7 @@ def time_step(t, fx_array, x, state): # Log internal variables. for k, net in nets.items(): print("Optimizer '{}' variables".format(k)) - print([op.name for op in nn.get_variables_in_module(net)]) + print([op.name for op in snt.get_variables_in_module(net)]) return MetaLoss(loss, update, reset, fx_final, x_final) diff --git a/meta_test.py b/meta_test.py index 0eb0c52..08fcee9 100644 --- a/meta_test.py +++ b/meta_test.py @@ -24,10 +24,10 @@ from nose_parameterized import parameterized import numpy as np from six.moves import xrange +import sonnet as snt import tensorflow as tf import meta -import nn import problems @@ -141,10 +141,10 @@ def testConvolutional(self): """Tests L2L applied to problem with convolutions.""" kernel_shape = 4 def convolutional_problem(): - conv = nn.Conv2D(output_channels=1, - kernel_shape=kernel_shape, - stride=1, - name="conv") + conv = snt.Conv2D(output_channels=1, + kernel_shape=kernel_shape, + stride=1, + name="conv") output = conv(tf.random_normal((100, 100, 3, 10))) return tf.reduce_sum(output) diff --git a/networks.py b/networks.py index 86301be..b588703 100644 --- a/networks.py +++ b/networks.py @@ -25,9 +25,9 @@ import dill as pickle import numpy as np import six +import sonnet as snt import tensorflow as tf -import nn import preprocess @@ -47,7 +47,7 @@ def factory(net, net_options=(), net_path=None): def save(network, sess, filename=None): """Save the variables contained by a network to disk.""" to_save = collections.defaultdict(dict) - variables = nn.get_variables_in_module(network) + variables = snt.get_variables_in_module(network) for v in variables: split = v.name.split(":")[0].split("/") @@ -63,7 +63,7 @@ def save(network, sess, filename=None): @six.add_metaclass(abc.ABCMeta) -class Network(nn.RNNCore): +class Network(snt.RNNCore): """Base class for meta-optimizer networks.""" @abc.abstractmethod @@ -166,8 +166,8 @@ def __init__(self, output_size, layers, preprocess_name="identity", tf modules). Default is `tf.identity`. preprocess_options: Gradient preprocessing options. scale: Gradient scaling (default is 1.0). - initializer: Variable initializer for linear layer. See `nn.Linear` and - `nn.LSTM` docs for more info. This parameter can be a string (e.g. + initializer: Variable initializer for linear layer. See `snt.Linear` and + `snt.LSTM` docs for more info. This parameter can be a string (e.g. "zeros" will be converted to tf.zeros_initializer). name: Module name. """ @@ -188,12 +188,12 @@ def __init__(self, output_size, layers, preprocess_name="identity", name = "lstm_{}".format(i) init = _get_layer_initializers(initializer, name, ("w_gates", "b_gates")) - self._cores.append(nn.LSTM(size, name=name, initializers=init)) - self._rnn = nn.DeepRNN(self._cores, skip_connections=False, - name="deep_rnn") + self._cores.append(snt.LSTM(size, name=name, initializers=init)) + self._rnn = snt.DeepRNN(self._cores, skip_connections=False, + name="deep_rnn") init = _get_layer_initializers(initializer, "linear", ("w", "b")) - self._linear = nn.Linear(output_size, name="linear", initializers=init) + self._linear = snt.Linear(output_size, name="linear", initializers=init) def _build(self, inputs, prev_state): """Connects the `StandardDeepLSTM` module into the graph. diff --git a/networks_test.py b/networks_test.py index 062dac9..145c0ce 100644 --- a/networks_test.py +++ b/networks_test.py @@ -20,10 +20,10 @@ from nose_parameterized import parameterized import numpy as np +import sonnet as snt import tensorflow as tf import networks -import nn class CoordinateWiseDeepLSTMTest(tf.test.TestCase): @@ -45,7 +45,7 @@ def testTrainable(self): state = net.initial_state_for_inputs(gradients) net(gradients, state) # Weights and biases for two layers. - variables = nn.get_variables_in_module(net) + variables = snt.get_variables_in_module(net) self.assertEqual(len(variables), 4) @parameterized.expand([ @@ -90,7 +90,7 @@ def testTrainable(self): state = net.initial_state_for_inputs(gradients) net(gradients, state) # Weights and biases for two layers. - variables = nn.get_variables_in_module(net) + variables = snt.get_variables_in_module(net) self.assertEqual(len(variables), 4) @parameterized.expand([ @@ -134,7 +134,7 @@ def testNonTrainable(self): net = networks.Sgd() state = net.initial_state_for_inputs(gradients) net(gradients, state) - variables = nn.get_variables_in_module(net) + variables = snt.get_variables_in_module(net) self.assertEqual(len(variables), 0) def testResults(self): @@ -169,7 +169,7 @@ def testNonTrainable(self): net = networks.Adam() state = net.initial_state_for_inputs(gradients) net(gradients, state) - variables = nn.get_variables_in_module(net) + variables = snt.get_variables_in_module(net) self.assertEqual(len(variables), 0) def testZeroLearningRate(self): diff --git a/nn/__init__.py b/nn/__init__.py deleted file mode 100644 index 7035fbe..0000000 --- a/nn/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""This python module contains Neural Network Modules for TensorFlow.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from nn.base import AbstractModule -from nn.base import Error -from nn.base import IncompatibleShapeError -from nn.base import Module -from nn.base import NotConnectedError -from nn.base import NotSupportedError -from nn.base import ParentNotBuiltError -from nn.base import Transposable -from nn.base import UnderspecifiedError -from nn.basic import BatchFlatten -from nn.basic import BatchReshape -from nn.basic import Linear -from nn.basic_rnn import DeepRNN -from nn.batch_norm import BatchNorm -from nn.conv import Conv2D -from nn.conv import Conv2DTranspose -from nn.conv import SAME -from nn.conv import VALID -from nn.convnet import ConvNet2D -from nn.gated_rnn import LSTM -from nn.mlp import MLP -from nn.rnn_core import RNNCore -from nn.rnn_core import TrainableInitialState -from nn.sequential import Sequential -from nn.util import get_variables_in_module diff --git a/nn/base.py b/nn/base.py deleted file mode 100644 index 19ac0c2..0000000 --- a/nn/base.py +++ /dev/null @@ -1,322 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Base class for TensorFlow nn. - -This file contains the Abstract Base Class for defining Modules in TensorFlow. -A Module is an object which can be connected into the Graph multiple times -using the __call__ method, sharing variables automatically with no need to -explicitly use scopes or specify reuse=True. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import six -from six import string_types -from six.moves import xrange -import tensorflow as tf - - -class Error(Exception): - """Base class for all errors from nn. - - This is thrown to indicate a Neural Network specific problem, e.g. wrong - module arity, module is not connected to the graph when it should be, - tried to wire together incompatible modules, etc. - """ - - -class NotConnectedError(Error): - """Error raised when operating on a module that has not yet been connected. - - Some module properties / methods are valid to access before the module has - been connected into the graph, but some are not. This Error is raised when - the user attempts to do anything not valid before connection. - """ - - -class ParentNotBuiltError(Error): - """Error raised when the parent of a module has not been built yet. - - For example, when making a transpose of modules which inherit from - `module.Transposable`, the parent has to be connected to the graph before the - child transpose to ensure that shape inference has already occurred. - """ - - -class IncompatibleShapeError(Error): - """Error raised when the shape of the input at build time is incompatible.""" - - -class UnderspecifiedError(Error): - """Error raised when too little information is available. - - This does not typically mean the user is trying to do something that doesn't - work (in which case `IncompatibleShapeError` should be used), just that - some more information needs to be provided in order to build the Graph. - """ - - -class NotSupportedError(Error): - """Error raised when something that cannot be supported is requested. - - For example a Dilated Convolution module cannot be transposed. - """ - - -@six.add_metaclass(abc.ABCMeta) -class AbstractModule(object): - """Superclass for nn Modules. - - This class defines the functionality that every module should implement, - principally the `build` method which is wrapped using `tf.make_template` - and called from `__call__`. Every time the module is called it will - be connected into the graph but using the same shared set of variables, thanks - to the template. - - For this to work correctly, the `build` implementation in the derived class - must access all variables using `tf.get_variable`, not `tf.Variable`. The same - set of variables must be created each time, if this is not the case an Error - will be raised. - - Every subclass must call this class' `__init__` at the start of their - `__init__`, passing the relevant name. If this step is omitted variable - sharing will not work. - """ - - # Name of TensorFlow collection containing ops to update every step, such as - # moving average update ops. - UPDATE_OPS_COLLECTION = tf.GraphKeys.UPDATE_OPS - - def __init__(self, name): - """Performs the initialisation necessary for all AbstractModule instances. - - Every subclass of AbstractModule must begin their constructor with a call to - this constructor, i.e. `super(MySubModule, self).__init__(name=name)`. - - Avoid instantiating sub-modules in __init__ where possible, as they will not - be defined under the module's scope. Instead, instantiate sub-modules in - `build`. - - Args: - name: Name of this module. Used to construct the Templated build function. - - Raises: - ValueError: If name is not specified. - """ - if not isinstance(name, string_types): - raise ValueError("Name must be a string.") - self._is_connected = False - self._template = tf.make_template(name, self._build, - create_scope_now_=True) - - # Update __call__ and the object docstrings to enable better introspection - self.__doc__ = self._build.__doc__ - self.__call__.__func__.__doc__ = self._build.__doc__ - - @abc.abstractmethod - def _build(self, *args, **kwargs): - """Add elements to the Graph, computing output Tensors from input Tensors. - - Subclasses must implement this method, which will be wrapped in a Template. - - Args: - *args: Input Tensors. - **kwargs: Additional Python flags controlling connection. - """ - pass - - def __call__(self, *args, **kwargs): - out = self._template(*args, **kwargs) - # Connect the module only if self._template returns with no errors. - self._is_connected = True - return out - - @property - def variable_scope(self): - """Returns the variable_scope declared by the module. - - It is valid for library users to access the internal templated - variable_scope, but only makes sense to do so after connection. Therefore - we raise an error here if the variable_scope is requested before connection. - - The only case where it does make sense to access the variable_scope before - connection is to get the post-uniquification name, which we support using - the separate .name property. - - Returns: - variable_scope: `tf.VariableScope` instance of the internal `tf.Template`. - - Raises: - NotConnectedError: If the module is not connected to the Graph. - """ - self._ensure_is_connected() - return self._template.variable_scope - - @property - def name(self): - """Returns the name of the Module.""" - return self._template.variable_scope.name - - @property - def is_connected(self): - """Returns true iff the Module been connected to the Graph at least once.""" - return self._is_connected - - @classmethod - def get_possible_initializer_keys(cls): - """Returns the keys the dictionary of variable initializers may contain. - - This provides the user with a way of knowing the initializer keys that are - available without having to instantiate a nn module. Subclasses may - override this class method if they need additional arguments to determine - what initializer keys may be provided. - - Returns: - Set with strings corresponding to the strings that may be passed to the - constructor. - """ - return getattr(cls, "POSSIBLE_INITIALIZER_KEYS", set()) - - def _ensure_is_connected(self): - """Raise an Error if the module has not been connected yet. - - Until the module is connected into the Graph, any variables created do - not exist yet and cannot be created in advance due to not knowing the size - of the input Tensor(s). This assertion ensures that any variables contained - in this module must now exist. - - Raises: - NotConnectedError: If the module is not connected to the Graph. - """ - if not self.is_connected: - raise NotConnectedError( - "Variables in {} not instantiated yet, __call__ the module " - "first.".format(self.name)) - - -@six.add_metaclass(abc.ABCMeta) -class Transposable(object): - """Transposable module interface. - - The Transposable interface requires that transposable modules implement - a method called `transpose`, returning a module which is the transposed - version of the one the method is called on. - Calling the method twice should return a module with the same specifications - as the original module. - - When implementing a transposable module, special care is required to make - sure that parameters needed to instantiate the module are provided as - functions whose invocation is deferred to graph construction time. - - For example, in Linear we might want to call: - - ```python - linear = nn.Linear(name="linear", output_size=output_size) - linear_transpose = linear.transpose() - ``` - - where the output_size for linear_transpose is not known yet, as linear is - not yet connected to the graph: output_size is passed to linear_transpose's - constructor as a lambda returning linear.input_size. The lambda will return - the correct value once linear is given an input. - Notice that linear_transpose's output_size value does not need to be defined - until the module is connected to the graph. - """ - - @abc.abstractmethod - def transpose(self, name=None, **kwargs): - """Builds and returns transposed version of module. - - Args: - name: Name of the transposed module. - **kwargs: Additional Python flags controlling transposition. - - Returns: - Transposed version of the module. - """ - pass - - @abc.abstractmethod - def input_shape(self): - """Returns shape of input `Tensor` passed at last call to `build`.""" - pass - - -class Module(AbstractModule): - """Module wrapping a function provided by the user.""" - - def __init__(self, build, name="module"): - """Constructs a module with a given build function. - - The Module class can be used to wrap a function assembling a network into a - module. - - For example, the following code implements a simple one-hidden-layer MLP - model by defining a function called make_model and using a Module instance - to wrap it. - - ```python - def make_model(inputs): - lin1 = nn.Linear(name="lin1", output_size=10)(inputs) - relu1 = tf.nn.relu(lin1, name="relu1") - lin2 = nn.Linear(name="lin2", output_size=20)(relu1) - return lin2 - - model = nn.Module(name='simple_mlp', build=make_model) - outputs = model(inputs) - ``` - - The `partial` package from `functools` can be used to bake configuration - parameters into the function at construction time, as shown in the following - example. - - ```python - from functools import partial - - def make_model(inputs, output_sizes): - lin1 = nn.Linear(name="lin1", output_size=output_sizes[0])(inputs) - relu1 = tf.nn.relu(lin1, name="relu1") - lin2 = nn.Linear(name="lin2", output_size=output_sizes[1])(relu1) - return lin2 - - model = nn.Module(name='simple_mlp', - build=partial(make_model, output_size=[10, 20]) - outputs = model(inputs) - ``` - - Args: - build: Callable to be invoked when connecting the module to the graph. - The `build` function is invoked when the module is called, and its - role is to specify how to add elements to the Graph, and how to - compute output Tensors from input Tensors. - The `build` function signature can include the following parameters: - *args - Input Tensors. - **kwargs - Additional Python parameters controlling connection. - name: Module name. - - Raises: - TypeError: If build is not callable. - """ - super(Module, self).__init__(name) - - if not callable(build): - raise TypeError("Input 'build' must be callable.") - self._build = build - - def _build(self, *args, **kwargs): - """Forwards call to the passed-in build function.""" - return self._build(*args, **kwargs) diff --git a/nn/basic.py b/nn/basic.py deleted file mode 100644 index a1c448e..0000000 --- a/nn/basic.py +++ /dev/null @@ -1,328 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Basic Modules for TensorFlow nn. - -Modules defining the simplest building blocks for Neural Networks. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import numbers - - -import numpy as np -import tensorflow as tf - -from nn import base -from nn import util - - -def create_linear_initializer(input_size): - """Returns a default initializer for weights or bias of a linear module.""" - stddev = 1 / math.sqrt(input_size) - return tf.truncated_normal_initializer(stddev=stddev) - - -class Linear(base.AbstractModule, base.Transposable): - """Linear module, optionally including bias.""" - - def __init__(self, - output_size, - use_bias=True, - initializers=None, - partitioners=None, - name="linear"): - """Constructs a Linear module. - - Args: - output_size: Output dimensionality. `output_size` can be either an integer - or a callable. In the latter case, since the function invocation is - deferred to graph construction time, the user must only ensure that - output_size can be called, returning an integer, when build is called. - use_bias: Whether to include bias parameters. Default `True`. - initializers: Optional dict containing initializers to initialize the - weights (with key 'w') or biases (with key 'b'). The default - initializers are truncated normal initializers, which are commonly - used when the inputs are zero centered (see - https://arxiv.org/pdf/1502.03167v3.pdf). - partitioners: Optional dict containing partitioners to partition - weights (with key 'w') or biases (with key 'b'). As a default, no - partitioners are used. - name: Name of the module. - - Raises: - KeyError: If an initializer is provided for a key other than 'w' or 'b' if - `use_bias` is `True`.. - TypeError: If a provided initializer is not a callable function. - """ - super(Linear, self).__init__(name=name) - self._output_size = output_size - self._use_bias = use_bias - self._input_shape = None - self._w = None - self._b = None - self.possible_keys = self.get_possible_initializer_keys(use_bias=use_bias) - self._initializers = util.check_initializers( - initializers, self.possible_keys) - self._partitioners = util.check_partitioners( - partitioners, self.possible_keys) - - @classmethod - def get_possible_initializer_keys(cls, use_bias=True): - return {"w", "b"} if use_bias else {"w"} - - def _build(self, inputs): - """Connects the Linear module into the graph, with input Tensor `inputs`. - - If this is not the first time the module has been connected to the graph, - the Tensor provided here must have the same final dimension, in order for - the existing variables to be the correct size for the multiplication. The - batch size may differ for each connection. - - Args: - inputs: A 2D Tensor of size [batch_size, input_size]. - - Returns: - A 2D Tensor of size [batch_size, output_size]. - - Raises: - base.IncompatibleShapeError: If the input is not a 2-D `Tensor` with - the size of the second dimension specified. - base.IncompatibleShapeError: If reconnecting an already connected module - into the graph, and the shape of the input is not compatible with - previous inputs. - """ - input_shape = tuple(inputs.get_shape().as_list()) - - if len(input_shape) != 2: - raise base.IncompatibleShapeError( - "{}: rank of shape must be 2 not: {}".format( - self.name, len(input_shape))) - - if input_shape[1] is None: - raise base.IncompatibleShapeError( - "{}: Input size must be specified at module build time".format( - self.name)) - - if self._input_shape is not None and input_shape[1] != self._input_shape[1]: - raise base.IncompatibleShapeError( - "{}: Input shape must be [batch_size, {}] not: [batch_size, {}]" - .format(self.name, self._input_shape[1], input_shape[1])) - - self._input_shape = input_shape - - if "w" not in self._initializers: - self._initializers["w"] = create_linear_initializer(self._input_shape[1]) - - if "b" not in self._initializers and self._use_bias: - self._initializers["b"] = create_linear_initializer(self._input_shape[1]) - - weight_shape = (self._input_shape[1], self.output_size) - dtype = inputs.dtype - self._w = tf.get_variable("w", - shape=weight_shape, - dtype=dtype, - initializer=self._initializers["w"], - partitioner=self._partitioners.get("w", None)) - outputs = tf.matmul(inputs, self._w) - - if self._use_bias: - bias_shape = (self.output_size,) - self._b = tf.get_variable("b", - shape=bias_shape, - dtype=dtype, - initializer=self._initializers["b"], - partitioner=self._partitioners.get("b", None)) - outputs += self._b - - return outputs - - @property - def w(self): - """Returns the Variable containing the weight matrix. - - Returns: - Variable object containing the weights, from the most recent __call__. - - Raises: - base.NotConnectedError: If the module has not been connected to the - graph yet, meaning the variables do not exist. - """ - self._ensure_is_connected() - return self._w - - @property - def b(self): - """Returns the Variable containing the bias. - - Returns: - Variable object containing the bias, from the most recent __call__. - - Raises: - base.NotConnectedError: If the module has not been connected to the - graph yet, meaning the variables do not exist. - AttributeError: If the module does not use bias. - """ - self._ensure_is_connected() - if not self._use_bias: - raise AttributeError( - "No bias Variable in Linear Module when `use_bias=False`.") - return self._b - - @property - def output_size(self): - """Returns the module output size.""" - if callable(self._output_size): - self._output_size = self._output_size() - return self._output_size - - @property - def has_bias(self): - """Returns `True` if bias Variable is present in the module.""" - return self._use_bias - - # Implements Transposable interface. - @property - def input_shape(self): - """Returns shape of input `Tensor` passed at last call to `_build`.""" - self._ensure_is_connected() - return self._input_shape - - # Implements Transposable interface - def transpose(self, name=None): - """Returns transposed `Linear` module. - - Args: - name: Optional string assigning name of transpose module. The default name - is constructed by appending "_transpose" to `self.name`. - - Returns: - Transposed `Linear` module. - """ - if name is None: - name = self.name + "_transpose" - return Linear(output_size=lambda: self.input_shape[1], - use_bias=self._use_bias, - initializers=self._initializers, - name=name) - - -class BatchReshape(base.AbstractModule, base.Transposable): - """Reshapes input Tensor, preserving the batch dimension.""" - - def __init__(self, shape, name="batch_reshape"): - """Constructs a BatchReshape module. - - Args: - shape: Shape to reshape the input Tensor to while preserving its - batch size; `shape` can be either a tuple/list, or a callable that - returns the actual shape. The callable does not need to be ready to - return something meaningful at construction time, but it will be - required to be able to do so when the module is connected to the - graph. When the special value -1 appears in `shape` the corresponding - size is automatically inferred. Note that -1 can only appear once in - `shape`. To flatten all non-batch dimensions, the nn.BatchFlatten - module can also be used. - name: Name of the module. - """ - super(BatchReshape, self).__init__(name=name) - - self._input_shape = None - self._shape = shape - - if not callable(self._shape): - self._shape = tuple(self._shape) - - def _infer_shape(self, dimensions): - """Replaces the -1 wildcard in the output shape vector. - - This function infers the correct output shape given the input dimensions. - - Args: - dimensions: List of input non-batch dimensions. - - Returns: - Tuple of non-batch output dimensions. - """ - # Size of input - n = np.prod(dimensions) - # Size of output where defined - m = np.prod(abs(np.array(self._shape))) - # Replace wildcard - v = np.array(self._shape) - v[v == -1] = n // m - return tuple(v) - - def _build(self, inputs): - """Connects the module into the graph, with input Tensor `inputs`. - - Args: - inputs: A Tensor of shape [batch_size] + input_shape. - - Returns: - A Tensor of shape [batch_size] + output_shape, with output_shape as - defined in constructor. - - Raises: - ValueError: If output shape is incompatible with input shape; or if - shape array contains non numeric entries; or if shape array contains - more than 1 wildcard -1. - """ - if callable(self._shape): - self._shape = tuple(self._shape()) - - if not all([isinstance(x, numbers.Integral) and (x > 0 or x == -1) - for x in self._shape]): - raise ValueError("Input array shape can contain positive integral " - "numbers only, and the wildcard -1 used once") - - if self._shape.count(-1) > 1: - raise ValueError("Wildcard -1 can appear only once in shape") - - self._input_shape = inputs.get_shape()[1:].as_list() - if self._shape.count(-1) > 0: - shape = (-1,) + self._infer_shape(self._input_shape) - else: - shape = (-1,) + self._shape - - if np.prod(self._input_shape) != np.prod(shape[1:]): - raise ValueError("Output shape is incompatible with input shape") - return tf.reshape(inputs, shape) - - @property - def input_shape(self): - self._ensure_is_connected() - return self._input_shape - - # Implements Transposable interface. - def transpose(self, name=None): - """Returns transpose batch reshape.""" - if name is None: - name = self.name + "_transpose" - return BatchReshape(shape=lambda: self.input_shape, name=name) - - -class BatchFlatten(BatchReshape): - """Flattens the input Tensor, preserving the batch dimension.""" - - def __init__(self, name="batch_flatten"): - """Constructs a BatchFlatten module. - - Args: - name: Name of the module. - """ - super(BatchFlatten, self).__init__(name=name, shape=(-1,)) diff --git a/nn/basic_rnn.py b/nn/basic_rnn.py deleted file mode 100644 index fa7e499..0000000 --- a/nn/basic_rnn.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Basic RNN Cores for TensorFlow nn. - -This file contains the definitions of the simplest building blocks for Recurrent -Neural Networks. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -import tensorflow as tf - -from tensorflow.python.framework import tensor_shape -from tensorflow.python.util import nest - -from nn import rnn_core - - -def _get_flat_core_sizes(cores): - """Obtains the list flattened output sizes of a list of cores. - - Args: - cores: list of cores to get the shapes from. - - Returns: - List of lists that, for each core, contains the list of its output - dimensions. - """ - core_sizes_lists = [] - for core in cores: - flat_output_size = nest.flatten(core.output_size) - core_sizes_lists.append([tensor_shape.as_shape(size).as_list() - for size in flat_output_size]) - return core_sizes_lists - - -class DeepRNN(rnn_core.RNNCore): - """RNN core which passes data through a number of internal modules or ops. - - This module is constructed by passing an iterable of externally constructed - modules or ops. The DeepRNN takes `(input, prev_state)` as input and passes - the input through each internal module in the order they were presented, - using elements from `prev_state` as necessary for internal recurrent cores. - The output is `(output, next_state)` in common with other RNN cores. - By default, skip connections from the input to all internal modules and from - each intermediate output to the final output are used. - - E.g.: - - ```python - lin = nn.Linear(hidden_size=128) - tanh = tf.tanh - lstm = nn.LSTM(hidden_size=256) - deep_rnn = nn.DeepRNN([lin, tanh, lstm]) - output, next_state = deep_rnn(input, prev_state) - ``` - - The computation set up inside the DeepRNN has the same effect as: - - ```python - lin_output = lin(input) - tanh_output = tanh(tf.concat(1, [input, lin_output])) - lstm_output, lstm_next_state = lstm( - tf.concat(1, [input, tanh_output]), prev_state[0]) - - next_state = (lstm_next_state,) - output = tf.concat(1, [lin_output, tanh_output, lstm_output]) - ``` - - Every internal module receives the preceding module's output and the entire - core's input. The output is created by concatenating each internal module's - output. In the case of internal recurrent elements, corresponding elements - of the state are used such that `state[i]` is passed to the `i`'th internal - recurrent element. Note that the state of a `DeepRNN` is always a tuple, which - will contain the same number of elements as there are internal recurrent - cores. If no internal modules are recurrent, the state of the DeepRNN as a - whole is the empty tuple. Wrapping non-recurrent modules into a DeepRNN can - be useful to produce something API compatible with a "real" recurrent module, - simplifying code that handles the cores. - - Without skip connections the previous example would become the following - (note the only difference is the addition of `skip_connections=False`): - - ```python - # ... declare other modules as above - deep_rnn = nn.DeepRNN([lin, tanh, lstm], skip_connections=False) - output, next_state = deep_rnn(input, prev_state) - ``` - - which is equivalent to: - - ```python - lin_output = lin(input) - tanh_output = tanh(lin_output) - lstm_output, lstm_next_state = lstm(tanh_output, prev_state[0]) - - next_state = (lstm_next_state,) - output = lstm_output - ``` - """ - - def __init__(self, cores, skip_connections=True, name="deep_rnn"): - """Construct a Deep RNN core. - - Args: - cores: iterable of modules or ops. - skip_connections: a boolean that indicates whether to use skip - connections. This means that the input is fed to all the layers, after - being concatenated with the output of the previous layer. The output - of the module will be the concatenation of all the outputs of the - internal modules. - name: name of the module. - - Raises: - ValueError: if `cores` is not an iterable. - """ - super(DeepRNN, self).__init__(name=name) - - if not isinstance(cores, collections.Iterable): - raise ValueError("Cores should be an iterable object.") - self._cores = tuple(cores) - self._skip_connections = skip_connections - - if self._skip_connections: - self._check_cores_output_sizes() - - self._is_recurrent_list = [isinstance(core, rnn_core.RNNCore) - for core in self._cores] - self._num_recurrent = sum(self._is_recurrent_list) - - def _check_cores_output_sizes(self): - """Checks the output_sizes of the cores of the DeepRNN module. - - Raises: - ValueError: if the outputs of the cores cannot be concatenated along their - first dimension. - """ - for core_sizes in zip(*tuple(_get_flat_core_sizes(self._cores))): - first_core_list = core_sizes[0][1:] - for i, core_list in enumerate(core_sizes[1:]): - if core_list[1:] != first_core_list: - raise ValueError("The outputs of the provided cores are not able " - "to be concatenated along the first feature " - "dimension. Core 0 has size %s, whereas Core %d " - "has size %s" % (first_core_list, i, core_list)) - - def _build(self, inputs, prev_state): - """Connects the DeepRNN module into the graph. - - If this is not the first time the module has been connected to the graph, - the Tensors provided as input_ and state must have the same final - dimension, in order for the existing variables to be the correct size for - their corresponding multiplications. The batch size may differ for each - connection. - - Args: - inputs: a nested tuple of Tensors of arbitrary dimensionality, with at - least an initial batch dimension. - prev_state: a tuple of `prev_state`s that corresponds to the state - of each one of the cores of the `DeepCore`. - - Returns: - output: a nested tuple of Tensors of arbitrary dimensionality, with at - least an initial batch dimension. - next_state: a tuple of `next_state`s that corresponds to the updated state - of each one of the cores of the `DeepCore`. - - Raises: - ValueError: if connecting the module into the graph any time after the - first time, and the inferred size of the inputs does not match previous - invocations. This may happen if one connects a module any time after the - first time that does not have the configuration of skip connections as - the first time. - """ - current_input = inputs - next_states = [] - outputs = [] - recurrent_idx = 0 - for i, core in enumerate(self._cores): - if self._skip_connections and i > 0: - flat_input = (nest.flatten(inputs), nest.flatten(current_input)) - flat_input = [tf.concat(1, input_) for input_ in zip(*flat_input)] - current_input = nest.pack_sequence_as(structure=inputs, - flat_sequence=flat_input) - - # Determine if this core in the stack is recurrent or not and call - # accordingly. - if self._is_recurrent_list[i]: - current_input, next_state = core(current_input, - prev_state[recurrent_idx]) - next_states.append(next_state) - recurrent_idx += 1 - else: - current_input = core(current_input) - - if self._skip_connections: - outputs.append(current_input) - - if self._skip_connections: - flat_outputs = tuple(nest.flatten(output) for output in outputs) - flat_outputs = [tf.concat(1, output) for output in zip(*flat_outputs)] - output = nest.pack_sequence_as(structure=outputs[0], - flat_sequence=flat_outputs) - else: - output = current_input - - return output, tuple(next_states) - - def initial_state(self, batch_size, dtype=tf.float32, trainable=False, - trainable_initializers=None): - """Builds the default start state for a DeepRNN. - - Args: - batch_size: An int, float or scalar Tensor representing the batch size. - dtype: The data type to use for the state. - trainable: Boolean that indicates whether to learn the initial state. - trainable_initializers: An initializer function or nested structure of - functions with same structure as the `state_size` property of the - core, to be used as initializers of the initial state variable. - - Returns: - A tensor or nested tuple of tensors with same structure and shape as the - `state_size` property of the core. - - Raises: - ValueError: if the number of passed initializers is not the same as the - number of recurrent cores. - """ - initial_state = [] - if trainable_initializers is None: - trainable_initializers = [None] * self._num_recurrent - - num_initializers = len(trainable_initializers) - - if num_initializers != self._num_recurrent: - raise ValueError("The number of initializers and recurrent cores should " - "be the same. Received %d initializers for %d specified " - "recurrent cores." - % (num_initializers, self._num_recurrent)) - - recurrent_idx = 0 - for is_recurrent, core in zip(self._is_recurrent_list, self._cores): - if is_recurrent: - with tf.variable_scope("%s-rec_core%d" % (self.name, recurrent_idx)): - core_initial_state = core.initial_state( - batch_size, dtype=dtype, trainable=trainable, - trainable_initializers=trainable_initializers[recurrent_idx]) - initial_state.append(core_initial_state) - recurrent_idx += 1 - return tuple(initial_state) - - @property - def state_size(self): - sizes = [] - for is_recurrent, core in zip(self._is_recurrent_list, self._cores): - if is_recurrent: - sizes.append(core.state_size) - return tuple(sizes) - - @property - def output_size(self): - if self._skip_connections: - output_size = [] - for core_sizes in zip(*tuple(_get_flat_core_sizes(self._cores))): - added_core_size = core_sizes[0] - added_core_size[0] = sum([size[0] for size in core_sizes]) - output_size.append(tf.TensorShape(added_core_size)) - return nest.pack_sequence_as(structure=self._cores[0].output_size, - flat_sequence=output_size) - else: - return self._cores[-1].output_size diff --git a/nn/batch_norm.py b/nn/batch_norm.py deleted file mode 100644 index f7a8b5b..0000000 --- a/nn/batch_norm.py +++ /dev/null @@ -1,499 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Batch normalization module for nn. - -This contains the module BatchNorm, which performs batch normalization on -its inputs. It has an optional post-normalization scale and offset, and it -maintains moving averages of the statistics for use at test time. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import xrange -import tensorflow as tf - -from tensorflow.contrib.layers.python.layers import utils -from tensorflow.python.training import moving_averages -from nn import base -from nn import util - - -class BatchNorm(base.AbstractModule): - """Batch normalization module, including optional affine transformation. - - This module maintains exponential moving averages of the mean and - variance, used for calculating more accurate shifted statistics at training - time and optionally used to normalize at test time. - - In order to update the moving averages, the user must run the - ops in the tf.GraphKeys.UPDATE_OPS TensorFlow collection. For example: - - bn = BatchNorm() - train_net = bn(train_inputs, is_training=True) - test_net = bn(test_inputs, is_training=False, test_local_stats=False) - - ... - - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - with tf.control_dependencies(update_ops): - train_op = tf.group(train_op) - - Then, whenever `train_op` is run so also are the moving average update ops. - - At training time, batch statistics (mean, variance) are not shared between - separate connections. The moving averages are shared between separate - connections. At both training and test time, the optional affine - transformations are shared between separate connections. - - Local batch statistics are used by default at test time, but the moving - averages can be used by specifying a flag when connecting. One often wants - to use local batch statistics at test time to track the progress while the - model is trained as it would ensure that moving average updates do not affect - the training curves. Once the training is finished, it's often advantageous - to use moving average statistics, since it would make evaluation agnostic to - the batch size, and might even lead to small improvements over the local - batch statistics. - """ - - GAMMA = "gamma" - BETA = "beta" - POSSIBLE_INITIALIZER_KEYS = {GAMMA, BETA} - - def __init__(self, reduction_indices=None, offset=True, scale=False, - decay_rate=0.999, eps=1e-3, initializers=None, - use_legacy_moving_second_moment=False, - name="batch_norm"): - """Constructs a BatchNorm module. - - By default reduces over all input tensor dimensions apart from the final - dimension. This has the effect of treating pixels in 1D/2D/3D images as - additional elements of the minibatch. - - If this is not the desired behaviour, the user can specify the tensor - indices to reduce over with `reduction_indices`. - - Args: - reduction_indices: Optional indices of dimensions to reduce over. - offset: Optional boolean to specify whether or not to apply a trained - component-wise bias after the batch normalization and scaling. - scale: Optional boolean to specify whether or not to apply a trained - component-wise scale after the batch normalization. - decay_rate: Decay rate of the exponential moving averages of the mean - and variance. - eps: Small number to avoid dividing by zero when diving by the standard - deviation. - initializers: Optional dict containing ops to initialize the weights of - the affine transform (`gamma` and `beta`). - use_legacy_moving_second_moment: Keep a moving second moment, rather than - the moving variance. This is deprecated, but is kept for backwards - compatability with old checkpoints. By default `False`. - name: Name of the module. - - Raises: - base.Error: If initializers contains any keys other - than `gamma` or `beta`. - ValueError: If `use_legacy_moving_second_moment` is not `True`. - """ - super(BatchNorm, self).__init__(name) - - self._reduction_indices = reduction_indices - self._offset = offset - self._scale = scale - self._decay_rate = decay_rate - self._eps = eps - self._use_legacy_moving_second_moment = use_legacy_moving_second_moment - - self._initializers = util.check_initializers( - initializers, self.POSSIBLE_INITIALIZER_KEYS) - - def _set_default_initializer(self, var_name): - """Sets up a default initializer for a variable if one doesn't exist. - - For the offset (beta), a zeros initializer is used by default. - For the scale (gamma), a ones initializer is used by default. - - Args: - var_name: name of variable as a string. - """ - if var_name not in self._initializers: - if var_name == self.GAMMA: - self._initializers[self.GAMMA] = tf.ones_initializer() - elif var_name == self.BETA: - self._initializers[self.BETA] = tf.zeros_initializer() - - def _build_statistics_variance(self, input_batch, - reduction_indices, use_batch_stats): - """Builds the statistics part of the graph when using moving variance. - - Args: - input_batch: Input batch Tensor. - reduction_indices: Indices of `input_batch` to reduce over. - use_batch_stats: Boolean to indicate if batch statistics should be - calculated, otherwise moving averages are returned. - - Returns: - Tuple of (mean, variance). - """ - # Set up our moving statistics. When connecting in parallel, this is shared. - self._moving_mean = tf.get_variable( - "moving_mean", - shape=self._mean_shape, - collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.GLOBAL_VARIABLES], - initializer=tf.zeros_initializer(), - trainable=False) - - self._moving_variance = tf.get_variable( - "moving_variance", - shape=self._mean_shape, - collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.GLOBAL_VARIABLES], - initializer=tf.ones_initializer(), - trainable=False) - - def build_batch_stats(): - """Builds the batch statistics calculation ops.""" - - # We use the moving mean as an estimate of the mean in order to perform - # a more numerically stable calculation of the batch mean. - # Copy for better stability. - shift = tf.add(self._moving_mean, 0) - counts, shifted_sum_x, shifted_sum_x2, _ = tf.nn.sufficient_statistics( - input_batch, - reduction_indices, - keep_dims=True, - shift=shift, - name="batch_norm_ss") - - mean, variance = tf.nn.normalize_moments(counts, - shifted_sum_x, - shifted_sum_x2, - shift, - name="normalize_moments") - - return mean, variance - - def build_moving_stats(): - return ( - tf.identity(self._moving_mean), - tf.identity(self._moving_variance), - ) - - mean, variance = utils.smart_cond( - use_batch_stats, - build_batch_stats, - build_moving_stats, - ) - - return mean, variance - - def _build_statistics_second_moment(self, input_batch, - reduction_indices, use_batch_stats): - """Builds the statistics part of the graph when using moving second moment. - - Args: - input_batch: Input batch Tensor. - reduction_indices: Indices of `input_batch` to reduce over. - use_batch_stats: Boolean to indicate if batch statistics should be - calculated, otherwise moving averages are returned. - - Returns: - Tuple of (mean, variance, second_moment). - """ - # Set up our moving statistics. When connecting in parallel, this is shared. - self._moving_mean = tf.get_variable( - "moving_mean", - shape=self._mean_shape, - collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.GLOBAL_VARIABLES], - initializer=tf.zeros_initializer(), - trainable=False) - - self._moving_second_moment = tf.get_variable( - "moving_second_moment", - shape=self._mean_shape, - collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.GLOBAL_VARIABLES], - initializer=tf.ones_initializer(), - trainable=False) - - self._moving_variance = tf.subtract(self._moving_second_moment, - tf.square(self._moving_mean), - name="moving_variance") - - def build_batch_stats(): - """Builds the batch statistics calculation ops.""" - - # Copy for better stability. - # We use the moving mean as an estimate of the mean in order to perform - # a more numerically stable calculation of the batch mean. - shift = tf.add(self._moving_mean, 0) - counts, shifted_sum_x, shifted_sum_x2, _ = tf.nn.sufficient_statistics( - input_batch, - reduction_indices, - keep_dims=True, - shift=shift, - name="batch_norm_ss") - - mean, variance = tf.nn.normalize_moments(counts, - shifted_sum_x, - shifted_sum_x2, - shift, - name="normalize_moments") - second_moment = variance + tf.square(mean) - - return mean, variance, second_moment - - def build_moving_stats(): - return ( - tf.identity(self._moving_mean), - tf.identity(self._moving_variance), - tf.identity(self._moving_second_moment), - ) - - mean, variance, second_moment = utils.smart_cond( - use_batch_stats, - build_batch_stats, - build_moving_stats, - ) - - return mean, variance, second_moment - - def _build_update_ops_variance(self, mean, variance, is_training): - """Builds the moving average update ops when using moving variance. - - Args: - mean: The mean value to update with. - variance: The variance value to update with. - is_training: Boolean Tensor to indicate if we're currently in - training mode. - """ - - def build_update_ops(): - """Builds the exponential moving average update ops.""" - - update_mean_op = moving_averages.assign_moving_average( - variable=self._moving_mean, - value=mean, - decay=self._decay_rate, - name="update_moving_mean").op - - update_variance_op = moving_averages.assign_moving_average( - variable=self._moving_variance, - value=variance, - decay=self._decay_rate, - name="update_moving_variance").op - - return update_mean_op, update_variance_op - - def build_no_ops(): - return (tf.no_op(), tf.no_op()) - - # Only make the ops if we know that `is_training=True`, or the value of - # `is_training` is unknown. - is_training_const = utils.constant_value(is_training) - if is_training_const is None or is_training_const: - update_mean_op, update_variance_op = utils.smart_cond( - is_training, - build_update_ops, - build_no_ops, - ) - - # Every new connection creates a new op which adds its contribution - # to the running average when ran. - tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mean_op) - tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_variance_op) - - def _build_update_ops_second_moment(self, mean, second_moment, is_training): - """Builds the moving average update ops when using the moving second moment. - - Args: - mean: The mean value to update with. - second_moment: The second_moment value to update with. - is_training: Boolean Tensor to indicate if we're currently in - training mode. - """ - - def build_update_ops(): - """Builds the exponential moving average update ops.""" - - update_mean_op = moving_averages.assign_moving_average( - variable=self._moving_mean, - value=mean, - decay=self._decay_rate, - name="update_moving_mean").op - - update_second_moment_op = moving_averages.assign_moving_average( - variable=self._moving_second_moment, - value=second_moment, - decay=self._decay_rate, - name="update_moving_second_moment").op - - return update_mean_op, update_second_moment_op - - def build_no_ops(): - return (tf.no_op(), tf.no_op()) - - # Only make the ops if we know that `is_training=True`, or the value of - # `is_training` is unknown. - is_training_const = utils.constant_value(is_training) - if is_training_const is None or is_training_const: - update_mean_op, update_second_moment_op = utils.smart_cond( - is_training, - build_update_ops, - build_no_ops, - ) - - # Every new connection creates a new op which adds its contribution - # to the running average when ran. - tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mean_op) - tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_second_moment_op) - - def _build(self, input_batch, is_training=True, test_local_stats=True): - """Connects the BatchNorm module into the graph. - - Args: - input_batch: A Tensor of arbitrary dimension. By default, the final - dimension is not reduced over when computing the minibatch statistics. - is_training: A boolean to indicate if the module should be connected in - training mode, meaning the moving averages are updated. By default - `True`. Can be a Tensor. - test_local_stats: A boolean to indicate if local batch statistics should - be used when `is_training=False`. If not, moving averages are used. - By default `True`. Can be a Tensor. - - Returns: - A tensor with the same shape as `input_batch`. - - Raises: - base.IncompatibleShapeError: If `reduction_indices` is not valid for the - input shape or has negative entries. - base.NotSupportedError: If `input_batch` has data type of `tf.float16`. - """ - input_shape = input_batch.get_shape() - - if self._reduction_indices is not None: - if len(self._reduction_indices) > len(input_shape): - raise base.IncompatibleShapeError( - "Too many reduction indices specified.") - - if max(self._reduction_indices) >= len(input_shape): - raise base.IncompatibleShapeError( - "Reduction index too large for input shape.") - - if min(self._reduction_indices) < 0: - raise base.IncompatibleShapeError( - "Reduction indeces must be non-negative.") - - reduction_indices = self._reduction_indices - else: - # Reduce over all dimensions except the last. - reduction_indices = range(len(input_shape))[:-1] - - if input_batch.dtype == tf.float16: - raise base.NotSupportedError( - "BatchNorm does not support `tf.float16`, insufficient " - "precision for calculating sufficient statistics.") - - self._mean_shape = input_batch.get_shape().as_list() - for index in reduction_indices: - self._mean_shape[index] = 1 - - use_batch_stats = is_training | test_local_stats - - # Use the legacy moving second moment if the flag is set. - if self._use_legacy_moving_second_moment: - tf.logging.warning( - "nn.BatchNorm `use_legacy_second_moment=True` is deprecated.") - - mean, variance, second_moment = self._build_statistics_second_moment( - input_batch, - reduction_indices, - use_batch_stats) - - self._build_update_ops_second_moment(mean, second_moment, is_training) - else: - mean, variance = self._build_statistics_variance( - input_batch, - reduction_indices, - use_batch_stats) - - self._build_update_ops_variance(mean, variance, is_training) - - # Set up optional scale and offset factors. - if self._offset: - self._set_default_initializer(self.BETA) - self._beta = tf.get_variable( - self.BETA, - shape=self._mean_shape, - initializer=self._initializers[self.BETA]) - else: - self._beta = None - - if self._scale: - self._set_default_initializer(self.GAMMA) - self._gamma = tf.get_variable( - self.GAMMA, - shape=self._mean_shape, - initializer=self._initializers[self.GAMMA]) - else: - self._gamma = None - - out = tf.nn.batch_normalization( - input_batch, - mean, - variance, - self._beta, - self._gamma, - self._eps, - name="batch_norm") - - return out - - @property - def moving_mean(self): - self._ensure_is_connected() - return self._moving_mean - - @property - def moving_second_moment(self): - self._ensure_is_connected() - return self._moving_second_moment - - @property - def moving_variance(self): - self._ensure_is_connected() - return self._moving_variance - - @property - def beta(self): - self._ensure_is_connected() - - if self._beta is None: - raise base.Error( - "Batch normalization doesn't have an offset, so no beta") - else: - return self._beta - - @property - def gamma(self): - self._ensure_is_connected() - - if self._gamma is None: - raise base.Error( - "Batch normalization doesn't have a scale, so no gamma") - else: - return self._gamma diff --git a/nn/conv.py b/nn/conv.py deleted file mode 100644 index c41dd75..0000000 --- a/nn/conv.py +++ /dev/null @@ -1,679 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Implementation of convolutional nn modules. - -Classes defining convolutional operations, inheriting from `nn.Module`, with -easy weight sharing. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import math -import numbers - - -import numpy as np -import tensorflow as tf - -from nn import base -from nn import util - - -SAME = "SAME" -VALID = "VALID" -ALLOWED_PADDINGS = {SAME, VALID} - - -def _fill_shape(x, n): - """Idempotentally converts an integer to a tuple of integers of a given size. - - This is used to allow shorthand notation for various configuration parameters. - A user can provide either, for example, `2` or `[2, 2]` as a kernel shape, and - this function returns `(2, 2)` in both cases. Passing `[1, 2]` will return - `(1, 2)`. - - Args: - x: An integer or an iterable of integers - n: An integer, the size of the desired output list - - Returns: - If `x` is an integer, a tuple of size `n` containing `n` copies of `x`. - If `x` is an iterable of integers of size `n`, it returns `tuple(x)`. - - Raises: - TypeError: If n is not a positive integer; - or if x is neither integer nor an iterable of size n. - """ - if not isinstance(n, numbers.Integral) or n < 1: - raise TypeError("n must be a positive integer") - - if isinstance(x, numbers.Integral): - return (x,) * n - elif (isinstance(x, collections.Iterable) and len(x) == n and - all(isinstance(v, numbers.Integral) for v in x)): - return tuple(x) - else: - raise TypeError("x is {}, must be either an integer " - "or an iterable of integers of size {}".format(x, n)) - - -def _fill_and_verify_kernel_shape(x, n): - """Expands x if necessary into a `n`-D kernel shape and reports errors.""" - try: - return _fill_shape(x, n) - except TypeError as e: - raise base.IncompatibleShapeError("Invalid kernel shape: {}".format(e)) - - -def _verify_padding(padding): - """Verifies that the provided padding is supported. Returns padding.""" - if padding not in ALLOWED_PADDINGS: - raise ValueError( - "Padding must be member of '{}', not {}".format( - ALLOWED_PADDINGS, padding)) - return padding - - -def _fill_and_one_pad_stride(stride, n): - """Expands the provided stride to size n and pads it with 1s.""" - try: - return (1,) + _fill_shape(stride, n) + (1,) - except TypeError: - raise base.IncompatibleShapeError( - "stride is {} ({}), must be either an integer or an iterable of " - "integers of size {}".format(stride, type(stride), n)) - - -def create_weight_initializer(fan_in_shape): - """Returns a default initializer for the weights of a convolutional module.""" - stddev = 1 / math.sqrt(np.prod(fan_in_shape)) - return tf.truncated_normal_initializer(stddev=stddev) - - -def create_bias_initializer(bias_shape): - """Returns a default initializer for the biases of a convolutional module.""" - stddev = 1 / math.sqrt(np.prod(bias_shape)) - return tf.truncated_normal_initializer(stddev=stddev) - - -class Conv2D(base.AbstractModule, base.Transposable): - """Spatial convolution and dilated convolution module, including bias. - - This acts as a light wrapper around the TensorFlow ops `tf.nn.conv2d` and - `tf.nn.atrous_conv2d`, abstracting away variable creation and sharing. - - The current implementation of `tf.nn.atrous_conv2d` does not easily permit for - strides > 1 when performing dilated convolution (see b/29893301). Therefore, - strides > 1 are currently disabled if the rate is set > 1. - """ - - def __init__(self, output_channels, kernel_shape, stride=1, rate=1, - padding=SAME, use_bias=True, initializers=None, mask=None, - name="conv_2d"): - """Constructs a Conv2D module. - - See the following documentation for an explanation of VALID versus SAME - padding modes: - https://www.tensorflow.org/versions/r0.8/api_docs/python/nn.html#convolution - - Args: - output_channels: Number of output channels. `output_channels` can be - either a number or a callable. In the latter case, since the function - invocation is deferred to graph construction time, the user must only - ensure that output_channels can be called, returning an integer, - when `_build` is called. - kernel_shape: List of kernel sizes, or integer that is used to define - kernel size in all dimensions. - stride: List of kernel strides, or integer that is used to define - stride in all dimensions. - rate: A positive integer, `rate=1` corresponds to standard 2D convolution, - `rate > 1` corresponds to dilated convolution. - padding: Padding algorithm, either `nn.SAME` or `nn.VALID`. - use_bias: Whether to include bias parameters. Default `True`. - initializers: Optional dict containing ops to initialize the filters (with - key 'w') or biases (with key 'b'). The default initializers are - truncated normal initializers, which are commonly used when the inputs - are zero centered (see https://arxiv.org/pdf/1502.03167v3.pdf). - mask: Optional 2D or 4D array, tuple or numpy array containing values to - multiply the weights by component-wise. - name: Name of the module. - - Raises: - base.IncompatibleShapeError: If the given kernel shape is not an integer; - or if the given kernel shape is not a sequence of two integers. - base.IncompatibleShapeError: If the given stride is not an integer; or if - the given stride is not a sequence of two or four integers. - base.IncompatibleShapeError: If a mask is given and its rank is neither 2 - nor 4. - base.NotSupportedError: If the given dilation rate is not a positive - integer. - base.NotSupportedError: If rate > 1 and the stride in any dimension is - > 1. - ValueError: If the given padding is not `nn.VALID` or `nn.SAME`. - KeyError: If initializers contains any keys other than 'w' or 'b'. - TypeError: If any of the given initializers are not callable. - TypeError: If mask is given and is not an array, tuple or a numpy array. - """ - super(Conv2D, self).__init__(name=name) - - self._output_channels = output_channels - self._input_shape = None - self._kernel_shape = _fill_and_verify_kernel_shape(kernel_shape, 2) - try: - self._stride = (1,) + _fill_shape(stride, 2) + (1,) - except TypeError as e: - # We want to support passing native strides akin to [1, m, n, 1]. - if len(stride) == 4: - self._stride = tuple(stride) - else: - raise base.IncompatibleShapeError("Invalid stride: {}".format(e)) - - if not isinstance(rate, numbers.Integral) or rate < 1: - raise base.NotSupportedError( - "Rate, {}, must be integer >= 1".format(rate)) - elif any(x > 1 for x in self._stride) and rate > 1: - raise base.NotSupportedError( - "Cannot have stride > 1 with rate > 1") - else: - self._rate = rate - - self._padding = _verify_padding(padding) - self._use_bias = use_bias - self.possible_keys = self.get_possible_initializer_keys(use_bias=use_bias) - self._initializers = util.check_initializers( - initializers, self.possible_keys) - - if mask is not None: - if not isinstance(mask, (list, tuple, np.ndarray)): - raise TypeError("Invalid type for mask: {}".format(type(mask))) - self._mask = np.asanyarray(mask) - mask_rank = mask.ndim - if mask_rank != 2 and mask_rank != 4: - raise base.IncompatibleShapeError( - "Invalid mask rank: {}".format(mask_rank)) - else: - self._mask = None - - @classmethod - def get_possible_initializer_keys(cls, use_bias=True): - return {"w", "b"} if use_bias else {"w"} - - def _build(self, inputs): - """Connects the Conv2D module into the graph, with input Tensor `inputs`. - - If this is not the first time the module has been connected to the graph, - the input Tensor provided here must have the same final 3 dimensions, in - order for the existing variables to be the correct size for the - multiplication. The batch size may differ for each connection. - - Args: - inputs: A 4D Tensor of shape [batch_size, input_height, input_width, - input_channels]. - - Returns: - A 4D Tensor of shape [batch_size, output_height, output_width, - output_channels]. - - Raises: - ValueError: If connecting the module into the graph any time after the - first time and the inferred size of the input does not match previous - invocations. - base.IncompatibleShapeError: If the input tensor has the wrong number - of dimensions. - base.IncompatibleShapeError: If a mask is present and its shape is - incompatible with the shape of the weights. - base.UnderspecifiedError: If the input tensor has an unknown - `input_channels`. - base.UnderspecifiedError: If rate > 1 is used with an input tensor with - unknown `input_width` or `input_height`. - TypeError: If input Tensor dtype is not `tf.float32`. - """ - # Handle input whose shape is unknown during graph creation. - self._input_shape = tuple(inputs.get_shape().as_list()) - - if len(self._input_shape) != 4: - raise base.IncompatibleShapeError( - "Input Tensor must have shape (batch_size, input_height, input_" - "width, input_channels)") - - if self._input_shape[3] is None: - raise base.UnderSpecifiedError( - "Number of input channels must be known at module build time") - else: - input_channels = self._input_shape[3] - - if inputs.dtype != tf.float32: - raise TypeError( - "Input must have dtype tf.float32, but dtype was {}".format( - inputs.dtype)) - - weight_shape = ( - self._kernel_shape[0], - self._kernel_shape[1], - input_channels, - self.output_channels) - - bias_shape = (self.output_channels,) - - if "w" not in self._initializers: - self._initializers["w"] = create_weight_initializer(weight_shape[:3]) - - if "b" not in self._initializers and self._use_bias: - self._initializers["b"] = create_bias_initializer(bias_shape) - - self._w = tf.get_variable("w", - shape=weight_shape, - initializer=self._initializers["w"]) - - w = self._w - - if self._mask is not None: - mask_rank = self._mask.ndim - mask_shape = self._mask.shape - if mask_rank == 2: - if mask_shape != self._kernel_shape: - raise base.IncompatibleShapeError( - "Invalid mask shape: {}".format(mask_shape)) - mask = np.reshape(self._mask, self._kernel_shape + (1, 1)) - elif mask_rank == 4: - if mask_shape != tuple(weight_shape): - raise base.IncompatibleShapeError( - "Invalid mask shape: {}".format(mask_shape)) - mask = self._mask - mask_tensor, = tf.py_func(lambda: mask, [], [w.dtype], stateful=False) - mask_tensor.set_shape(weight_shape) - w *= mask - - if self._rate > 1: - if any(x is None for x in self._input_shape[1:-1]): - raise base.UnderspecifiedError( - "Can't use atrous convolutions with unknown input_width or " - "input_height at graph build time") - outputs = tf.nn.atrous_conv2d(inputs, - w, - rate=self._rate, - padding=self._padding) - else: - outputs = tf.nn.conv2d(inputs, - w, - strides=self._stride, - padding=self._padding) - - if self._use_bias: - self._b = tf.get_variable("b", - shape=bias_shape, - initializer=self._initializers["b"]) - outputs += self._b - - return outputs - - @property - def output_channels(self): - """Returns the number of output channels.""" - if callable(self._output_channels): - self._output_channels = self._output_channels() - return self._output_channels - - @property - def kernel_shape(self): - """Returns the kernel shape.""" - return self._kernel_shape - - @property - def stride(self): - """Returns the stride.""" - return self._stride - - @property - def rate(self): - """Returns the dilation rate.""" - return self._rate - - @property - def padding(self): - """Returns the padding algorithm.""" - return self._padding - - @property - def w(self): - """Returns the Variable containing the weight matrix.""" - self._ensure_is_connected() - return self._w - - @property - def b(self): - """Returns the Variable containing the bias. - - Returns: - Variable object containing the bias, from the most recent __call__. - - Raises: - base.NotConnectedError: If the module has not been connected to the graph - yet, meaning the variables do not exist. - AttributeError: If the module does not use bias. - """ - self._ensure_is_connected() - if not self._use_bias: - raise AttributeError( - "No bias Variable in Conv2D Module when `use_bias=False`.") - return self._b - - @property - def has_bias(self): - """Returns `True` if bias Variable is present in the module.""" - return self._use_bias - - @property - def initializers(self): - """Returns the initializers dictionary.""" - return self._initializers - - # Implements Transposable interface. - @property - def input_shape(self): - """Returns the input shape.""" - self._ensure_is_connected() - return self._input_shape - - # Implements Transposable interface. - def transpose(self, name=None): - """Returns matching `Conv2DTranspose` module. - - Args: - name: Optional string assigning name of transpose module. The default name - is constructed by appending "_transpose" to `self.name`. - - Returns: - `Conv2DTranspose` module. - - Raises: - base.NotSupportedError: If `rate > 1`. - """ - if self._rate > 1: - raise base.NotSupportedError( - "Cannot transpose a dilated convolution module.") - - if name is None: - name = self.name + "_transpose" - return Conv2DTranspose(output_channels=lambda: self.input_shape[-1], - output_shape=lambda: self.input_shape[1:3], - kernel_shape=self.kernel_shape, - stride=self.stride, - padding=self.padding, - use_bias=self._use_bias, - initializers=self.initializers, - name=name) - - -class Conv2DTranspose(base.AbstractModule, base.Transposable): - """Spatial transposed / reverse / up 2D convolution module, including bias. - - This acts as a light wrapper around the TensorFlow op `tf.nn.conv2d_transpose` - abstracting away variable creation and sharing. - """ - - def __init__(self, output_channels, output_shape, kernel_shape, stride=1, - padding=SAME, use_bias=True, initializers=None, - name="conv_2d_transpose"): - """Constructs a `Conv2DTranspose module`. - - See the following documentation for an explanation of VALID versus SAME - padding modes: - https://www.tensorflow.org/versions/r0.8/api_docs/python/nn.html#convolution - - Args: - output_channels: Number of output channels. - Can be either a number or a callable. In the latter case, since the - function invocation is deferred to graph construction time, the user - must only ensure `output_channels` can be called, returning an - integer, when build is called. - output_shape: Output shape of transpose convolution. - Can be either an iterable of integers or a callable. In the latter - case, since the function invocation is deferred to graph construction - time, the user must only ensure that `output_shape` can be called, - returning an iterable of format `(out_height, out_width)` when - `_build` is called. Note that `output_shape` defines the size of - output signal domain, as opposed to the shape of the output `Tensor`. - kernel_shape: List of kernel sizes, must be length 2. - stride: List of kernel strides. - padding: Padding algorithm, either `nn.SAME` or `nn.VALID`. - use_bias: Whether to include bias parameters. Default `True`. - initializers: Optional dict containing ops to initialize the filters (with - key 'w') or biases (with key 'b'). - name: Name of the module. - - Raises: - base.IncompatibleShapeError: If the given kernel shape is neither an - integer nor a sequence of two integers. - base.IncompatibleShapeError: If the given stride is neither an integer nor - a sequence of two or four integers. - ValueError: If the given padding is not `nn.VALID` or `nn.SAME`. - KeyError: If `initializers` contains any keys other than 'w' or 'b'. - TypeError: If any of the given initializers are not callable. - """ - super(Conv2DTranspose, self).__init__(name) - - self._output_channels = output_channels - if callable(output_shape): - self._output_shape = output_shape - else: - self._output_shape = tuple(output_shape) - self._input_shape = None - - self._kernel_shape = _fill_and_verify_kernel_shape(kernel_shape, 2) - # We want to support passing native strides akin to [1, m, n, 1]. - if isinstance(stride, collections.Iterable) and len(stride) == 4: - if not stride[0] == stride[3] == 1: - raise base.IncompatibleShapeError( - "Invalid stride: First and last element must be 1.") - self._stride = tuple(stride) - else: - self._stride = _fill_and_one_pad_stride(stride, 2) - - self._padding = _verify_padding(padding) - self._use_bias = use_bias - self.possible_keys = self.get_possible_initializer_keys(use_bias=use_bias) - self._initializers = util.check_initializers( - initializers, self.possible_keys) - - @classmethod - def get_possible_initializer_keys(cls, use_bias=True): - return {"w", "b"} if use_bias else {"w"} - - def _build(self, inputs): - """Connects the Conv2DTranspose module into the graph. - - If this is not the first time the module has been connected to the graph, - the input Tensor provided here must have the same final 3 dimensions, in - order for the existing variables to be the correct size for the - multiplication. The batch size may differ for each connection. - - Args: - inputs: A 4D Tensor of shape [batch_size, input_height, input_width, - input_channels]. - - Returns: - A 4D Tensor of shape [batch_size, output_height, output_width, - output_channels]. - - Raises: - ValueError: If connecting the module into the graph any time after the - first time and the inferred size of the input does not match previous - invocations. - base.IncompatibleShapeError: If the input tensor has the wrong number of - dimensions; or if the input tensor has an unknown `input_channels`; or - or if `output_shape` is an iterable and is not in the format - `(out_height, out_width)`. - TypeError: If input Tensor dtype is not `tf.float32`. - """ - # Handle input whose shape is unknown during graph creation. - self._input_shape = tuple(inputs.get_shape().as_list()) - - if len(self._input_shape) != 4: - raise base.IncompatibleShapeError( - "Input Tensor must have shape (batch_size, input_height, " - "input_width, input_channels)") - - if self._input_shape[3] is None: - raise base.IncompatibleShapeError( - "Number of input channels must be known at module build time") - input_channels = self._input_shape[3] - - if inputs.dtype != tf.float32: - raise TypeError("Input must have dtype tf.float32, but dtype was " + - inputs.dtype) - - if len(self.output_shape) != 2: - raise base.IncompatibleShapeError("Output shape must be specified as " - "(output_height, output_width)") - - weight_shape = (self._kernel_shape[0], self._kernel_shape[1], - self.output_channels, input_channels) - - bias_shape = (self.output_channels,) - - if "w" not in self._initializers: - fan_in_shape = weight_shape[:2] + (weight_shape[3],) - self._initializers["w"] = create_weight_initializer(fan_in_shape) - - if "b" not in self._initializers and self._use_bias: - self._initializers["b"] = create_bias_initializer(bias_shape) - - self._w = tf.get_variable("w", - shape=weight_shape, - initializer=self._initializers["w"]) - - # Use tensorflow shape op to manipulate inputs shape, so that unknown batch - # size - which can happen when using input placeholders - is handled - # correcly. - batch_size = tf.expand_dims(tf.shape(inputs)[0], 0) - conv_output_shape = tf.convert_to_tensor( - tuple(self.output_shape) + (self.output_channels,)) - output_shape = tf.concat(0, [batch_size, conv_output_shape]) - - outputs = tf.nn.conv2d_transpose(inputs, - self._w, - output_shape, - strides=self._stride, - padding=self._padding) - - if self._use_bias: - self._b = tf.get_variable("b", - shape=bias_shape, - initializer=self._initializers["b"]) - outputs += self._b - - # Recover output tensor shape value and pass it to set_shape in order to - # enable shape inference. - batch_size_value = inputs.get_shape()[0] - output_shape_value = ((batch_size_value,) + self.output_shape + - (self.output_channels,)) - outputs.set_shape(output_shape_value) - - return outputs - - @property - def output_channels(self): - """Returns the number of output channels.""" - if callable(self._output_channels): - self._output_channels = self._output_channels() - return self._output_channels - - @property - def kernel_shape(self): - """Returns the kernel shape.""" - return self._kernel_shape - - @property - def stride(self): - """Returns the stride.""" - return self._stride - - @property - def output_shape(self): - """Returns the output shape.""" - if callable(self._output_shape): - self._output_shape = tuple(self._output_shape()) - return self._output_shape - - @property - def padding(self): - """Returns the padding algorithm.""" - return self._padding - - @property - def w(self): - """Returns the Variable containing the weight matrix.""" - self._ensure_is_connected() - return self._w - - @property - def b(self): - """Returns the Variable containing the bias. - - Returns: - Variable object containing the bias, from the most recent __call__. - - Raises: - base.NotConnectedError: If the module has not been connected to the graph - yet, meaning the variables do not exist. - AttributeError: If the module does not use bias. - """ - self._ensure_is_connected() - if not self._use_bias: - raise AttributeError( - "No bias Variable in Conv2DTranspose Module when `use_bias=False`.") - return self._b - - @property - def has_bias(self): - """Returns `True` if bias Variable is present in the module.""" - return self._use_bias - - @property - def initializers(self): - """Returns the initializers dictionary.""" - return self._initializers - - # Implements Transposable interface. - @property - def input_shape(self): - """Returns the input shape.""" - self._ensure_is_connected() - return self._input_shape - - # Implements Transposable interface. - def transpose(self, name=None): - """Returns matching `Conv2D` module. - - Args: - name: Optional string assigning name of transpose module. The default name - is constructed by appending "_transpose" to `self.name`. - - Returns: - `Conv2D` module. - """ - if name is None: - name = self.name + "_transpose" - return Conv2D(output_channels=lambda: self.input_shape[-1], - kernel_shape=self.kernel_shape, - stride=self.stride, - padding=self.padding, - use_bias=self._use_bias, - initializers=self.initializers, - name=name) diff --git a/nn/convnet.py b/nn/convnet.py deleted file mode 100644 index 7a43ddf..0000000 --- a/nn/convnet.py +++ /dev/null @@ -1,446 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A minimal interface convolutional networks module.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -from six.moves import xrange -import tensorflow as tf - -from nn import base -from nn import batch_norm -from nn import conv -from nn import util - - -def _replicate_elements(input_iterable, num_times): - """Replicates entry in `input_iterable` if `input_iterable` is of length 1.""" - if len(input_iterable) == 1: - return (input_iterable[0],) * num_times - return tuple(input_iterable) - - -class ConvNet2D(base.AbstractModule, base.Transposable): - """A 2D Convolutional Network module.""" - - POSSIBLE_INITIALIZER_KEYS = {"w", "b"} - - def __init__(self, - output_channels, - kernel_shapes, - strides, - paddings, - activation=tf.nn.relu, - activate_final=False, - initializers=None, - use_batch_norm=False, - use_bias=True, - batch_norm_config=None, - name="conv_net_2d"): - """Constructs a `ConvNet2D` module. - - By default, neither batch normalization nor activation are applied to the - output of the final layer. - - Args: - output_channels: Iterable of output channels, as defined in - `conv.Conv2D`. Output channels can be defined either as number or via a - callable. In the latter case, since the function invocation is deferred - to graph construction time, the user must only ensure that entries can - be called when build is called. Each entry in the iterable defines - properties in the corresponding convolutional layer. - kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if - the list contains one element only, the same kernel shape is used in - each layer of the network. - strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the - list contains one element only, the same stride is used in each layer of - the network. - paddings: Iterable of padding options, either `nn.SAME` or - `nn.VALID`; if the Iterable contains one element only, the same padding - is used in each layer of the network. - activation: An activation op. - activate_final: Boolean determining if the activation and batch - normalization, if turned on, are applied to the final layer. - initializers: Optional dict containing ops to initialize the filters of - the whole network (with key 'w') or biases (with key 'b'). - use_batch_norm: Boolean determining if batch normalization is applied - after convolution. - use_bias: Whether to include bias parameters in the convolutional layers. - Default `True`. - batch_norm_config: Optional mapping of additional configuration for the - `nn.BatchNorm` modules. - name: Name of the module. - - Raises: - TypeError: If `output_channels` is not iterable; or if `kernel_shapes` is - not iterable; or `strides` is not iterable; or `paddings` is not - iterable; or if `activation` is not callable; or `batch_norm_config` is - not a mappable (e.g. `dict`). - ValueError: If `output_channels` is empty; or if `kernel_shapes` has not - length 1 or `len(output_channels)`; or if `strides` has not - length 1 or `len(output_channels)`; or if `paddings` has not - length 1 or `len(output_channels)`. - Error: If initializers contains any keys other than 'w' or 'b'. - """ - if not isinstance(output_channels, collections.Iterable): - raise TypeError("output_channels must be iterable") - output_channels = tuple(output_channels) - - if not isinstance(kernel_shapes, collections.Iterable): - raise TypeError("kernel_shapes must be iterable") - kernel_shapes = tuple(kernel_shapes) - - if not isinstance(strides, collections.Iterable): - raise TypeError("strides must be iterable") - strides = tuple(strides) - - if not isinstance(paddings, collections.Iterable): - raise TypeError("paddings must be iterable") - paddings = tuple(paddings) - - super(ConvNet2D, self).__init__(name) - - if not output_channels: - raise ValueError("output_channels must not be empty") - self._output_channels = tuple(output_channels) - self._num_layers = len(self._output_channels) - - self._input_shape = None - - self._initializers = util.check_initializers( - initializers, self.POSSIBLE_INITIALIZER_KEYS) - - if not callable(activation): - raise TypeError("Input 'activation' must be callable") - self._activation = activation - self._activate_final = activate_final - - self._kernel_shapes = _replicate_elements(kernel_shapes, self._num_layers) - if len(self._kernel_shapes) != self._num_layers: - raise ValueError( - "kernel_shapes must be of length 1 or len(output_channels)") - - self._strides = _replicate_elements(strides, self._num_layers) - if len(self._strides) != self._num_layers: - raise ValueError( - """strides must be of length 1 or len(output_channels)""") - - self._paddings = _replicate_elements(paddings, self._num_layers) - if len(self._paddings) != self._num_layers: - raise ValueError( - """paddings must be of length 1 or len(output_channels)""") - - self._use_batch_norm = use_batch_norm - - if batch_norm_config is not None: - if not isinstance(batch_norm_config, collections.Mapping): - raise TypeError("`batch_norm_config` must be a mapping, e.g. `dict`.") - self._batch_norm_config = batch_norm_config - else: - self._batch_norm_config = {} - - self._use_bias = use_bias - self._instantiate_layers() - - def _instantiate_layers(self): - """Instantiates all the convolutional modules used in the network.""" - - with tf.variable_scope(self._template.variable_scope): - self._layers = tuple(conv.Conv2D(name="conv_2d_{}".format(i), - output_channels=self._output_channels[i], - kernel_shape=self._kernel_shapes[i], - stride=self._strides[i], - padding=self._paddings[i], - use_bias=self._use_bias, - initializers=self._initializers) - for i in xrange(self._num_layers)) - - def _build(self, inputs, is_training=True, test_local_stats=True): - """Assembles the `ConvNet2D` and connects it to the graph. - - Args: - inputs: A 4D Tensor of shape `[batch_size, input_height, input_width, - input_channels]`. - is_training: Boolean to indicate to `nn.BatchNorm` if we are - currently training. By default `True`. - test_local_stats: Boolean to indicate to `nn.BatchNorm` if batch - normalization should use local batch statistics at test time. - By default `True`. - - Returns: - A 4D Tensor of shape `[batch_size, output_height, output_width, - output_channels[-1]]`. - """ - self._input_shape = tuple(inputs.get_shape().as_list()) - net = inputs - - final_index = len(self._layers) - 1 - for i, layer in enumerate(self._layers): - net = layer(net) - - if i != final_index or self._activate_final: - if self._use_batch_norm: - bn = batch_norm.BatchNorm(name="batch_norm_{}".format(i), - **self._batch_norm_config) - net = bn(net, - is_training=is_training, - test_local_stats=test_local_stats) - - net = self._activation(net) - - return net - - @property - def layers(self): - """Returns a tuple containing the convolutional layers of the network.""" - return self._layers - - @property - def strides(self): - return self._strides - - @property - def paddings(self): - return self._paddings - - @property - def kernel_shapes(self): - return self._kernel_shapes - - @property - def output_channels(self): - return tuple([l() if callable(l) else l for l in self._output_channels]) - - @property - def use_bias(self): - return self._use_bias - - @property - def use_batch_norm(self): - return self._use_batch_norm - - @property - def activate_final(self): - return self._activate_final - - # Implements Transposable interface. - @property - def input_shape(self): - """Returns shape of input `Tensor` passed at last call to `_build`.""" - self._ensure_is_connected() - return self._input_shape - - # Implements Transposable interface. - def transpose(self, name=None, output_channels=None): - """Returns transposed conv net. - - Args: - name: Optional string specifiying the name of the transposed module. The - default name is constructed by appending "_transpose" to `self.name`. - output_channels: Optional iterable of numbers of output channels. - - Returns: - Matching `ConvNetTranspose2D` module. - - Raises: - ValueError: If output_channels is specified and its length does not match - the number of layers. - """ - if name is None: - name = self.name + "_transpose" - - if output_channels is None: - output_channels = [] - for layer in reversed(self._layers): - output_channels.append(lambda l=layer: l.input_shape[-1]) - - elif len(output_channels) != len(self._layers): - raise ValueError("Iterable output_channels length must match the" - "number of layers ({}), but is {} instead.".format( - len(self._layers), len(output_channels))) - - output_shapes = [] - for layer in reversed(self._layers): - output_shapes.append(lambda l=layer: l.input_shape[1:-1]) - - return ConvNet2DTranspose(name=name, - output_channels=output_channels, - output_shapes=output_shapes, - kernel_shapes=reversed(self.kernel_shapes), - strides=reversed(self.strides), - paddings=reversed(self.paddings), - activation=self._activation, - activate_final=self._activate_final, - initializers=self._initializers, - use_batch_norm=self._use_batch_norm, - use_bias=self._use_bias, - batch_norm_config=self._batch_norm_config) - - -class ConvNet2DTranspose(ConvNet2D): - """A 2D Transpose-Convolutional Network module.""" - - def __init__(self, - output_channels, - output_shapes, - kernel_shapes, - strides, - paddings, - activation=tf.nn.relu, - activate_final=False, - initializers=None, - use_batch_norm=False, - use_bias=True, - batch_norm_config=None, - name="conv_net_2d_transpose"): - """Constructs a `ConvNetTranspose2D` module. - - `output_{shapes,channels}` can be defined either as iterable of - {iterables,integers} or via a callable. In the latter case, since the - function invocation is deferred to graph construction time, the user - must only ensure that entries can be called returning meaningful values when - build is called. Each entry in the iterable defines properties in the - corresponding convolutional layer. - - By default, neither batch normalization nor activation are applied to the - output of the final layer. - - Args: - output_channels: Iterable of numbers of output channels. - output_shapes: Iterable of output shapes as defined in - `conv.conv2DTranpose`; if the iterable contains one element only, the - same shape is used in each layer of the network. - kernel_shapes: Iterable of kernel sizes as defined in `conv.Conv2D`; if - the list contains one element only, the same kernel shape is used in - each layer of the network. - strides: Iterable of kernel strides as defined in `conv.Conv2D`; if the - list contains one element only, the same stride is used in each layer of - the network. - paddings: Iterable of padding options, either `nn.SAME` or - `nn.VALID`; if the Iterable contains one element only, the same padding - is used in each layer of the network. - activation: An activation op. - activate_final: Boolean determining if the activation and batch - normalization, if turned on, are applied to the final layer. - initializers: Optional dict containing ops to initialize the filters of - the whole network (with key 'w') or biases (with key 'b'). - use_batch_norm: Boolean determining if batch normalization is applied - after convolution. - use_bias: Whether to include bias parameters in the convolutional layers. - Default `True`. - batch_norm_config: Optional mapping of additional configuration for the - `nn.BatchNorm` modules. - name: Name of the module. - - Raises: - TypeError: If `output_channels` is not iterable; or if `output_channels` - is not iterable; or if `kernel_shapes` is not iterable; or `strides` is - not iterable; or `paddings` is not iterable; or if `activation` is not - callable. - ValueError: If `output_channels` is empty; or if `kernel_shapes` has not - length 1 or `len(output_channels)`; or if `strides` has not - length 1 or `len(output_channels)`; or if `paddings` has not - length 1 or `len(output_channels)`. - Error: If initializers contains any keys other than 'w' or 'b'. - """ - if not isinstance(output_channels, collections.Iterable): - raise TypeError("output_channels must be iterable") - output_channels = tuple(output_channels) - num_layers = len(output_channels) - - if not isinstance(output_shapes, collections.Iterable): - raise TypeError("output_shapes must be iterable") - output_shapes = tuple(output_shapes) - - self._output_shapes = _replicate_elements(output_shapes, num_layers) - if len(self._output_shapes) != num_layers: - raise ValueError( - "output_shapes must be of length 1 or len(output_channels)") - - super(ConvNet2DTranspose, self).__init__( - output_channels, - kernel_shapes, - strides, - paddings, - activation=activation, - activate_final=activate_final, - initializers=initializers, - use_batch_norm=use_batch_norm, - use_bias=use_bias, - batch_norm_config=batch_norm_config, - name=name) - - def _instantiate_layers(self): - """Instantiates all the convolutional modules used in the network.""" - - with tf.variable_scope(self._template.variable_scope): - self._layers = tuple( - conv.Conv2DTranspose(name="conv_2d_transpose_{}".format(i), - output_channels=self._output_channels[i], - output_shape=self._output_shapes[i], - kernel_shape=self._kernel_shapes[i], - stride=self._strides[i], - padding=self._paddings[i], - initializers=self._initializers, - use_bias=self._use_bias) - for i in xrange(self._num_layers)) - - @property - def output_shapes(self): - return tuple([l() if callable(l) else l for l in self._output_shapes]) - - # Implement Transposable interface. - def transpose(self, name=None, output_channels=None): - """Returns transposed conv net. - - Args: - name: Optional string specifiying the name of the transposed module. The - default name is constructed by appending "_transpose" to `self.name`. - output_channels: Optional iterable of numbers of output channels. - - Returns: - Matching `ConvNetTranspose2D` module. - - Raises: - ValueError: If output_channels is specified and its length does not match - the number of layers. - """ - if name is None: - name = self.name + "_transpose" - - if output_channels is None: - output_channels = [] - for layer in reversed(self._layers): - output_channels.append(lambda l=layer: l.input_shape[-1]) - - elif len(output_channels) != len(self._layers): - raise ValueError("Iterable output_channels length must match the" - "number of layers ({}), but is {} instead.".format( - len(self._layers), len(output_channels))) - - return ConvNet2D(name=name, - output_channels=output_channels, - kernel_shapes=reversed(self.kernel_shapes), - strides=reversed(self.strides), - paddings=reversed(self.paddings), - activation=self._activation, - activate_final=self._activate_final, - initializers=self._initializers, - use_batch_norm=self._use_batch_norm, - use_bias=self._use_bias, - batch_norm_config=self._batch_norm_config) diff --git a/nn/gated_rnn.py b/nn/gated_rnn.py deleted file mode 100644 index a84373d..0000000 --- a/nn/gated_rnn.py +++ /dev/null @@ -1,592 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""LSTM based modules for TensorFlow nn. - -This python module contains LSTM-like cores that fall under the broader group -of RNN cores. In general, initializers for the gate weights and other -model parameters may be passed to the constructor. - -Typical usage example of the standard LSTM without peephole connections: - - ``` - import nn - - - hidden_size = 10 - batch_size = 2 - - # Simple LSTM op on some input - rnn = nn.LSTM(hidden_size) - input = tf.placeholder(tf.float32, shape=[batch_size, hidden_size]) - out, next_state = rnn(input, rnn.initial_state()) - ``` -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from six.moves import xrange # pylint: disable=redefined-builtin -import tensorflow as tf - -from tensorflow.python.ops import array_ops -from tensorflow.contrib import rnn - -from nn import base -from nn import basic -from nn import batch_norm -from nn import rnn_core -from nn import util - - -class LSTM(rnn_core.RNNCore): - """LSTM recurrent network cell with optional peepholes & batch normalization. - - The base implementation is based on: http://arxiv.org/abs/1409.2329. We add - forget_bias (default: 1) to the biases of the forget gate in order to - reduce the scale of forgetting in the beginning of the training. - - #### Peep-hole connections - - Peep-hole connections may optionally be used by specifying a flag in the - constructor. These connections can aid increasing the precision of output - timing, for more details see: - - https://research.google.com/pubs/archive/43905.pdf - - #### Batch normalization - - The batch norm transformation (in training mode) is - batchnorm(x) = gamma * (x - mean(x)) / stddev(x) + beta, - where gamma is a learnt scaling factor and beta is a learnt offset. - - Batch normalization may optionally be used at different places in the LSTM by - specifying flag(s) in the constructor. These are applied when calculating - the gate activations and cell-to-hidden transformation. The set-up is based on - - https://arxiv.org/pdf/1603.09025.pdf - - ##### Batch normalization: where to apply? - - Batch norm can be applied in three different places in the LSTM: - - (h) To the W_h h_{t-1} contribution to the gates from the previous hiddens. - (x) To the W_x x_t contribution to the gates from the current input. - (c) To the cell value c_t when calculating the output h_t from the cell. - - (The notation here is consistent with the Recurrent Batch Normalization - paper). Each of these can be controlled individually, because batch norm is - expensive, and not all are necessary. The paper doesn't mention the relative - effects of these different batch norms; however, experimentation with a - shallow LSTM for the `permuted_mnist` sequence task suggests that (h) is the - most important and the other two can be left off. For other tasks or deeper - (stacked) LSTMs, other batch norm combinations may be more effective. - - ##### Batch normalization: collecting stats (training vs test) - - When switching to testing (see `LSTM.with_batch_norm_control`), we can use a - mean and stddev learnt from the training data instead of using the statistics - from the test data. (This both increases test accuracy because the statistics - have less variance, and if the test data does not have the same distribution - as the training data then we must use the training statistics to ensure the - effective network does not change when switching to testing anyhow.) - - This does however introduces a slight subtlety. The first few time steps of - the RNN tend to have varying statistics (mean and variance) before settling - down to a steady value. Therefore in general, better performance is obtained - by using separate statistics for the first few time steps, and then using the - final set of statistics for all subsequent time steps. This is controlled by - the parameter `max_unique_stats`. (We can't have an unbounded number of - distinct statistics for both technical reasons and also for the case where - test sequences are longer than anything seen in training.) - - You may be fine leaving it at its default value of 1. Small values (like 10) - may achieve better performance on some tasks when testing with cached - statistics. - - Attributes: - state_size: Tuple of `tf.TensorShape`s indicating the size of state tensors. - output_size: `tf.TensorShape` indicating the size of the core output. - use_peepholes: Boolean indicating whether peephole connections are used. - use_batch_norm_h: Boolean indicating whether batch norm (h) is enabled. - use_batch_norm_x: Boolean indicating whether batch norm (x) is enabled. - use_batch_norm_c: Boolean indicating whether batch norm (c) is enabled. - """ - - # Keys that may be provided for parameter initializers. - W_GATES = "w_gates" # weight for gates - B_GATES = "b_gates" # bias of gates - W_F_DIAG = "w_f_diag" # weight for prev_cell -> forget gate peephole - W_I_DIAG = "w_i_diag" # weight for prev_cell -> input gate peephole - W_O_DIAG = "w_o_diag" # weight for prev_cell -> output gate peephole - GAMMA_H = "gamma_h" # batch norm scaling for previous_hidden -> gates - GAMMA_X = "gamma_x" # batch norm scaling for input -> gates - GAMMA_C = "gamma_c" # batch norm scaling for cell -> output - BETA_C = "beta_c" # (batch norm) bias for cell -> output - POSSIBLE_KEYS = {W_GATES, B_GATES, W_F_DIAG, W_I_DIAG, W_O_DIAG, GAMMA_H, - GAMMA_X, GAMMA_C, BETA_C} - - def __init__(self, - hidden_size, - forget_bias=1.0, - initializers=None, - use_peepholes=False, - use_batch_norm_h=False, - use_batch_norm_x=False, - use_batch_norm_c=False, - max_unique_stats=1, - name="lstm"): - """Construct LSTM. - - Args: - hidden_size: (int) Hidden size dimensionality. - forget_bias: (float) Bias for the forget activation. - initializers: Dict containing ops to initialize the weights. - This dictionary may contain any of the keys in POSSIBLE_KEYS. - The gamma and beta variables control batch normalization values for - different batch norm transformations inside the cell; see the paper for - details. - use_peepholes: Boolean that indicates whether peephole connections are - used. - use_batch_norm_h: Boolean that indicates whether to apply batch - normalization at the previous_hidden -> gates contribution. If you are - experimenting with batch norm then this may be the most effective to - turn on. - use_batch_norm_x: Boolean that indicates whether to apply batch - normalization at the input -> gates contribution. - use_batch_norm_c: Boolean that indicates whether to apply batch - normalization at the cell -> output contribution. - max_unique_stats: The maximum number of steps to use unique batch norm - statistics for. (See module description above for more details.) - name: name of the module. - - Raises: - KeyError: if `initializers` contains any keys not in POSSIBLE_KEYS. - ValueError: if a peephole initializer is passed in the initializer list, - but `use_peepholes` is False. - ValueError: if a batch norm initializer is passed in the initializer list, - but batch norm is disabled. - ValueError: if `max_unique_stats` is not the default value, but batch norm - is disabled. - ValueError: if `max_unique_stats` is < 1. - """ - super(LSTM, self).__init__(name=name) - - self._hidden_size = hidden_size - self._forget_bias = forget_bias - self._use_peepholes = use_peepholes - self._max_unique_stats = max_unique_stats - self._use_batch_norm_h = use_batch_norm_h - self._use_batch_norm_x = use_batch_norm_x - self._use_batch_norm_c = use_batch_norm_c - self.possible_keys = self.get_possible_initializer_keys( - use_peepholes=use_peepholes, use_batch_norm_h=use_batch_norm_h, - use_batch_norm_x=use_batch_norm_x, use_batch_norm_c=use_batch_norm_c) - self._initializers = util.check_initializers(initializers, - self.possible_keys) - if max_unique_stats < 1: - raise ValueError("max_unique_stats must be >= 1") - if max_unique_stats != 1 and not ( - use_batch_norm_h or use_batch_norm_x or use_batch_norm_c): - raise ValueError("max_unique_stats specified but batch norm disabled") - - if use_batch_norm_h: - self._batch_norm_h = LSTM.IndexedStatsBatchNorm(max_unique_stats, - "batch_norm_h") - if use_batch_norm_x: - self._batch_norm_x = LSTM.IndexedStatsBatchNorm(max_unique_stats, - "batch_norm_x") - if use_batch_norm_c: - self._batch_norm_c = LSTM.IndexedStatsBatchNorm(max_unique_stats, - "batch_norm_c") - - def with_batch_norm_control(self, is_training=True, test_local_stats=True): - """Wraps this RNNCore with the additional control input to the `BatchNorm`s. - - Example usage: - - lstm = nnd.LSTM(4) - is_training = tf.placeholder(tf.bool) - rnn_input = ... - my_rnn = rnn.rnn(lstm.with_batch_norm_control(is_training), rnn_input) - - Args: - is_training: Boolean that indicates whether we are in - training mode or testing mode. When in training mode, the batch norm - statistics are taken from the given batch, and moving statistics are - updated. When in testing mode, the moving statistics are not updated, - and in addition if `test_local_stats` is False then the moving - statistics are used for the batch statistics. See the `BatchNorm` module - for more details. - test_local_stats: Boolean scalar indicated whether to use local - batch statistics in test mode. - - Returns: - RNNCell wrapping this class with the extra input(s) added. - """ - return LSTM.CellWithExtraInput(self, - is_training=is_training, - test_local_stats=test_local_stats) - - @classmethod - def get_possible_initializer_keys( - cls, use_peepholes=False, use_batch_norm_h=False, use_batch_norm_x=False, - use_batch_norm_c=False): - possible_keys = cls.POSSIBLE_KEYS.copy() - if not use_peepholes: - possible_keys.difference_update( - {cls.W_F_DIAG, cls.W_I_DIAG, cls.W_O_DIAG}) - if not use_batch_norm_h: - possible_keys.remove(cls.GAMMA_H) - if not use_batch_norm_x: - possible_keys.remove(cls.GAMMA_X) - if not use_batch_norm_c: - possible_keys.difference_update({cls.GAMMA_C, cls.BETA_C}) - return possible_keys - - def _build(self, inputs, prev_state, is_training=True, test_local_stats=True): - """Connects the LSTM module into the graph. - - If this is not the first time the module has been connected to the graph, - the Tensors provided as inputs and state must have the same final - dimension, in order for the existing variables to be the correct size for - their corresponding multiplications. The batch size may differ for each - connection. - - Args: - inputs: Tensor of size `[batch_size, input_size]`. - prev_state: Tuple (prev_hidden, prev_cell), or if batch norm is enabled - and `max_unique_stats > 1`, then (prev_hidden, prev_cell, time_step). - Here, prev_hidden and prev_cell are tensors of size - `[batch_size, hidden_size]`, and time_step is used to indicate the - current RNN step. - is_training: Boolean indicating whether we are in training mode (as - opposed to testing mode), passed to the batch norm - modules. Note to use this you must wrap the cell via the - `with_batch_norm_control` function. - test_local_stats: Boolean indicating whether to use local batch statistics - in test mode. See the `BatchNorm` documentation for more on this. - - Returns: - A tuple (output, next_state) where 'output' is a Tensor of size - `[batch_size, hidden_size]` and 'next_state' is a tuple - (next_hidden, next_cell) or (next_hidden, next_cell, time_step + 1), - where next_hidden and next_cell have size `[batch_size, hidden_size]`. - - Raises: - ValueError: If connecting the module into the graph any time after the - first time, and the inferred size of the inputs does not match previous - invocations. - """ - if self._max_unique_stats == 1: - prev_hidden, prev_cell = prev_state - time_step = None - else: - prev_hidden, prev_cell, time_step = prev_state - - self._create_gate_variables(inputs.get_shape(), inputs.dtype) - self._create_batch_norm_variables(inputs.dtype) - - # pylint false positive: calling module of same file; see b/29989864 - # pylint: disable=not-callable - - if self._use_batch_norm_h or self._use_batch_norm_x: - gates_h = tf.matmul(prev_hidden, self._w_h) - gates_x = tf.matmul(inputs, self._w_x) - if self._use_batch_norm_h: - gates_h = self._gamma_h * self._batch_norm_h(gates_h, - time_step, - is_training, - test_local_stats) - if self._use_batch_norm_x: - gates_x = self._gamma_x * self._batch_norm_x(gates_x, - time_step, - is_training, - test_local_stats) - gates = gates_h + gates_x + self._b - else: - # Parameters of gates are concatenated into one multiply for efficiency. - inputs_and_hidden = tf.concat([inputs, prev_hidden], 1) - gates = tf.matmul(inputs_and_hidden, self._w_xh) + self._b - - # i = input_gate, j = new_input, f = forget_gate, o = output_gate - i, j, f, o = array_ops.split(gates, 4, 1) - - if self._use_peepholes: # diagonal connections - self._create_peephole_variables(inputs.dtype) - f += self._w_f_diag * prev_cell - i += self._w_i_diag * prev_cell - - forget_mask = tf.sigmoid(f + self._forget_bias) - new_cell = forget_mask * prev_cell + tf.sigmoid(i) * tf.tanh(j) - cell_output = new_cell - if self._use_batch_norm_c: - cell_output = (self._beta_c - + self._gamma_c * self._batch_norm_c(cell_output, - time_step, - is_training, - test_local_stats)) - if self._use_peepholes: - cell_output += self._w_o_diag * cell_output - new_hidden = tf.tanh(cell_output) * tf.sigmoid(o) - - if self._max_unique_stats == 1: - return new_hidden, (new_hidden, new_cell) - else: - return new_hidden, (new_hidden, new_cell, time_step + 1) - - def _create_batch_norm_variables(self, dtype): - """Initialize the variables used for the `BatchNorm`s (if any).""" - # The paper recommends a value of 0.1 for good gradient flow through the - # tanh nonlinearity (although doesn't say whether this is for all gammas, - # or just some). - gamma_initializer = tf.constant_initializer(0.1) - - if self._use_batch_norm_h: - self._gamma_h = tf.get_variable( - LSTM.GAMMA_H, - shape=[4 * self._hidden_size], - dtype=dtype, - initializer=(self._initializers.get(LSTM.GAMMA_H, gamma_initializer))) - if self._use_batch_norm_x: - self._gamma_x = tf.get_variable( - LSTM.GAMMA_X, - shape=[4 * self._hidden_size], - dtype=dtype, - initializer=(self._initializers.get(LSTM.GAMMA_X, gamma_initializer))) - if self._use_batch_norm_c: - self._gamma_c = tf.get_variable( - LSTM.GAMMA_C, - shape=[self._hidden_size], - dtype=dtype, - initializer=(self._initializers.get(LSTM.GAMMA_C, gamma_initializer))) - self._beta_c = tf.get_variable( - LSTM.BETA_C, - shape=[self._hidden_size], - dtype=dtype, - initializer=self._initializers.get(LSTM.BETA_C)) - - def _create_gate_variables(self, input_shape, dtype): - """Initialize the variables used for the gates.""" - if len(input_shape) != 2: - raise ValueError( - "Rank of shape must be {} not: {}".format(2, len(input_shape))) - input_size = input_shape.dims[1].value - - b_shape = [4 * self._hidden_size] - - equiv_input_size = self._hidden_size + input_size - initializer = basic.create_linear_initializer(equiv_input_size) - - if self._use_batch_norm_h or self._use_batch_norm_x: - self._w_h = tf.get_variable( - LSTM.W_GATES + "_H", - shape=[self._hidden_size, 4 * self._hidden_size], - dtype=dtype, - initializer=self._initializers.get(LSTM.W_GATES, initializer)) - self._w_x = tf.get_variable( - LSTM.W_GATES + "_X", - shape=[input_size, 4 * self._hidden_size], - dtype=dtype, - initializer=self._initializers.get(LSTM.W_GATES, initializer)) - else: - self._w_xh = tf.get_variable( - LSTM.W_GATES, - shape=[self._hidden_size + input_size, 4 * self._hidden_size], - dtype=dtype, - initializer=self._initializers.get(LSTM.W_GATES, initializer)) - self._b = tf.get_variable( - LSTM.B_GATES, - shape=b_shape, - dtype=dtype, - initializer=self._initializers.get(LSTM.B_GATES, initializer)) - - def _create_peephole_variables(self, dtype): - """Initialize the variables used for the peephole connections.""" - self._w_f_diag = tf.get_variable( - LSTM.W_F_DIAG, - shape=[self._hidden_size], - dtype=dtype, - initializer=self._initializers.get(LSTM.W_F_DIAG)) - self._w_i_diag = tf.get_variable( - LSTM.W_I_DIAG, - shape=[self._hidden_size], - dtype=dtype, - initializer=self._initializers.get(LSTM.W_I_DIAG)) - self._w_o_diag = tf.get_variable( - LSTM.W_O_DIAG, - shape=[self._hidden_size], - dtype=dtype, - initializer=self._initializers.get(LSTM.W_O_DIAG)) - - def initial_state(self, batch_size, dtype=tf.float32, trainable=False, - trainable_initializers=None): - """Builds the default start state tensor of zeros. - - Args: - batch_size: An int, float or scalar Tensor representing the batch size. - dtype: The data type to use for the state. - trainable: Boolean that indicates whether to learn the initial state. - trainable_initializers: An optional pair of initializers for the - initial hidden state and cell state. - - Returns: - A tensor tuple `([batch_size x state_size], [batch_size x state_size], ?)` - filled with zeros, with the third entry present when batch norm is enabled - with `max_unique_stats > 1', with value `0` (representing the time step). - """ - if self._max_unique_stats == 1: - return super(LSTM, self).initial_state( - batch_size, dtype, trainable, trainable_initializers) - else: - if not trainable: - state = super(rnn_core.RNNCore, self).zero_state(batch_size, dtype) - else: - # We have to manually create the state ourselves so we don't create a - # variable that never gets used for the third entry. - state = rnn_core.trainable_initial_state( - batch_size, - (tf.TensorShape([self._hidden_size]), - tf.TensorShape([self._hidden_size])), - dtype, - trainable_initializers) - return (state[0], state[1], tf.constant(0, dtype=tf.int32)) - - @property - def state_size(self): - """Tuple of `tf.TensorShape`s indicating the size of state tensors.""" - if self._max_unique_stats == 1: - return (tf.TensorShape([self._hidden_size]), - tf.TensorShape([self._hidden_size])) - else: - return (tf.TensorShape([self._hidden_size]), - tf.TensorShape([self._hidden_size]), - tf.TensorShape(1)) - - @property - def output_size(self): - """`tf.TensorShape` indicating the size of the core output.""" - return tf.TensorShape([self._hidden_size]) - - @property - def use_peepholes(self): - """Boolean indicating whether peephole connections are used.""" - return self._use_peepholes - - @property - def use_batch_norm_h(self): - """Boolean indicating whether batch norm for hidden -> gates is enabled.""" - return self._use_batch_norm_h - - @property - def use_batch_norm_x(self): - """Boolean indicating whether batch norm for input -> gates is enabled.""" - return self._use_batch_norm_x - - @property - def use_batch_norm_c(self): - """Boolean indicating whether batch norm for cell -> output is enabled.""" - return self._use_batch_norm_c - - class IndexedStatsBatchNorm(base.AbstractModule): - """BatchNorm module where batch statistics are selected by an input index. - - This is used by LSTM+batchnorm, where we have distinct batch norm statistics - for the first `max_unique_stats` time steps, and then use the final set of - statistics for subsequent time steps. - - The module has as input (x, index, is_training, test_local_stats). During - training or when test_local_stats=True, the output is simply batchnorm(x) - (where mean(x) and stddev(x) are used), and during training the - `BatchNorm` module accumulates statistics in mean_i, etc, where - i = min(index, max_unique_stats - 1). - - During testing with test_local_stats=False, the output is batchnorm(x), - where mean_i and stddev_i are used instead of mean(x) and stddev(x). - - See the `BatchNorm` module for more on is_training and test_local_stats. - - No offset `beta` or scaling `gamma` are learnt. - """ - - def __init__(self, max_unique_stats, name=None): - """Create an IndexedStatsBatchNorm. - - Args: - max_unique_stats: number of different indices to have statistics for; - indices beyond this will use the final statistics. - name: Name of the module. - """ - super(LSTM.IndexedStatsBatchNorm, self).__init__(name=name) - self._max_unique_stats = max_unique_stats - - def _build(self, inputs, index, is_training, test_local_stats): - """Add the IndexedStatsBatchNorm module to the graph. - - Args: - inputs: Tensor to apply batch norm to. - index: Scalar TensorFlow int32 value to select the batch norm index. - is_training: Boolean to indicate to `nn.BatchNorm` if we are - currently training. - test_local_stats: Boolean to indicate to `nn.BatchNorm` if batch - normalization should use local batch statistics at test time. - - Returns: - Output of batch norm operation. - """ - def create_batch_norm(): - return batch_norm.BatchNorm(offset=False, scale=False)( - inputs, is_training, test_local_stats) - - if self._max_unique_stats > 1: - pred_fn_pairs = [(tf.equal(i, index), create_batch_norm) - for i in xrange(self._max_unique_stats - 1)] - out = tf.case(pred_fn_pairs, create_batch_norm) - out.set_shape(inputs.get_shape()) # needed for tf.case shape inference - return out - else: - return create_batch_norm() - - class CellWithExtraInput(rnn.RNNCell): - """Wraps an RNNCell to create a new RNNCell with extra input appended. - - This will pass the additional input `args` and `kwargs` to the __call__ - function of the RNNCell after the input and prev_state inputs. - """ - - def __init__(self, cell, *args, **kwargs): - """Construct the CellWithExtraInput. - - Args: - cell: The RNNCell to wrap (typically a nn.RNNCore). - *args: Extra arguments to pass to __call__. - **kwargs: Extra keyword arguments to pass to __call__. - """ - self._cell = cell - self._args = args - self._kwargs = kwargs - - def __call__(self, inputs, state): - return self._cell(inputs, state, *self._args, **self._kwargs) - - @property - def state_size(self): - """Tuple indicating the size of nested state tensors.""" - return self._cell.state_size - - @property - def output_size(self): - """`tf.TensorShape` indicating the size of the core output.""" - return self._cell.output_size diff --git a/nn/mlp.py b/nn/mlp.py deleted file mode 100644 index bfbb3c2..0000000 --- a/nn/mlp.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A minimal interface mlp module.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -from six.moves import xrange -import tensorflow as tf - -from nn import base -from nn import basic -from nn import util - - -class MLP(base.AbstractModule, base.Transposable): - """A Multi-Layer perceptron module.""" - - def __init__(self, - output_sizes, - activation=tf.nn.relu, - activate_final=False, - initializers=None, - use_bias=True, - name="mlp"): - """Constructs an MLP module. - - Args: - output_sizes: An iterable of output dimensionalities as defined in - `basic.Linear`. Output size can be defined either as number or via a - callable. In the latter case, since the function invocation is deferred - to graph construction time, the user must only ensure that entries can - be called when build is called. Each entry in the iterable defines - properties in the corresponding linear layer. - activation: An activation op. The activation is applied to intermediate - layers, and optionally to the output of the final layer. - activate_final: Boolean determining if the activation is applied to - the output of the final layer. Default `False`. - initializers: Optional dict containing ops to initialize the linear - layers' weights (with key 'w') or biases (with key 'b'). - use_bias: Whether to include bias parameters in the linear layers. - Default `True`. - name: Name of the module. - - Raises: - Error: If initializers contains any keys other than 'w' or 'b'. - ValueError: If output_sizes is empty. - TypeError: If `activation` is not callable; or if `output_sizes` is not - iterable. - """ - super(MLP, self).__init__(name=name) - - if not isinstance(output_sizes, collections.Iterable): - raise TypeError("output_sizes must be iterable") - output_sizes = tuple(output_sizes) - if not output_sizes: - raise ValueError("output_sizes must not be empty") - self._output_sizes = output_sizes - self._num_layers = len(self._output_sizes) - self._input_shape = None - - self.possible_keys = self.get_possible_initializer_keys(use_bias=use_bias) - self._initializers = util.check_initializers(initializers, - self.possible_keys) - if not callable(activation): - raise TypeError("Input 'activation' must be callable") - self._activation = activation - self._activate_final = activate_final - - self._use_bias = use_bias - self._instantiate_layers() - - def _instantiate_layers(self): - """Instantiates all the linear modules used in the network. - - Layers are instantiated in the constructor, as opposed to the build - function, because MLP implements the Transposable interface, and the - transpose function can be called before the module is actually connected - to the graph and build is called. - - Notice that this is safe since layers in the transposed module are - instantiated using a lambda returning input_size of the mlp layers, and - this doesn't have to return sensible values until the original module is - connected to the graph. - """ - - with tf.variable_scope(self._template.variable_scope): - self._layers = [basic.Linear(self._output_sizes[i], - name="linear_{}".format(i), - initializers=self._initializers, - use_bias=self.use_bias) - for i in xrange(self._num_layers)] - - @classmethod - def get_possible_initializer_keys(cls, use_bias=True): - return basic.Linear.get_possible_initializer_keys(use_bias=use_bias) - - def _build(self, inputs): - """Assembles the `MLP` and connects it to the graph. - - Args: - inputs: A 2D Tensor of size `[batch_size, input_size]`. - - Returns: - A 2D Tensor of size `[batch_size, output_sizes[-1]]`. - """ - self._input_shape = tuple(inputs.get_shape().as_list()) - net = inputs - - final_index = self._num_layers - 1 - for layer_id in xrange(self._num_layers): - net = self._layers[layer_id](net) - - if final_index != layer_id or self._activate_final: - net = self._activation(net) - - return net - - @property - def layers(self): - """Returns a tuple containing the linear layers of the `MLP`.""" - return self._layers - - @property - def output_sizes(self): - return tuple([l() if callable(l) else l for l in self._output_sizes]) - - @property - def use_bias(self): - return self._use_bias - - @property - def activate_final(self): - return self._activate_final - - # Implements Transposable interface - @property - def input_shape(self): - """Returns shape of input `Tensor` passed at last call to `build`.""" - self._ensure_is_connected() - return self._input_shape - - # Implements Transposable interface - def transpose(self, name=None): - """Returns transposed `MLP`. - - Args: - name: Optional string specifiying the name of the transposed module. The - default name is constructed by appending "_transpose" to `self.name`. - - Returns: - Matching transposed `MLP` module. - """ - if name is None: - name = self.name + "_transpose" - output_sizes = [lambda l=layer: l.input_shape[1] for layer in self._layers] - output_sizes.reverse() - return MLP(name=name, - output_sizes=output_sizes, - activation=self._activation, - activate_final=self._activate_final, - initializers=self._initializers, - use_bias=self._use_bias) diff --git a/nn/rnn_core.py b/nn/rnn_core.py deleted file mode 100644 index bc2c6e2..0000000 --- a/nn/rnn_core.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Base class for TensorFlow nn recurrent cores. - -This file contains the Abstract Base Class for defining Recurrent Cores in -TensorFlow. A Recurrent Core is an object which holds the properties of other -`nn.Module`s and also satisfies the interface of any RNNCell in tensorflow. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import warnings - - -import six -from six.moves import xrange -import tensorflow as tf - -from tensorflow.python.framework import tensor_shape -from tensorflow.contrib import rnn -from tensorflow.python.util import nest - -from nn import base - - -def _single_learnable_state(state, state_id=0, learnable=True): - """Returns an initial (maybe learnable) state. - - This function does not create any variable scopes, and it should be called - from a nn module. This function also makes sure that all the rows of its - `state` argument have the same value. - - Args: - state: initial value of the initial state. It should be a tensor of at least - two dimensions, of which the first dimension corresponds to the - batch_size dimension. All rows of such tensor should have the same value. - state_id: integer that uniquely identifies this state. - learnable: boolean that indicates whether the state is learnable. - - Returns: - The initial learnable state `Tensor`. - """ - unpacked_state = tf.unpack(state) - # Assert that all rows have the same values. - assert_rows_equal = [tf.assert_equal(s, unpacked_state[0]) - for s in unpacked_state] - - # We wish to have all the graph assertions in the graph's critical path, - # so we include them even if the initial state is left unmodified (i.e. when - # the state is not learnable). - # Note: All these assertions will be run every time that data flows - # through the graph. At that point, the control_dependencies context manager - # makes sure that such assertions are run, and will raise an exception if any - # fails. - with tf.control_dependencies(assert_rows_equal): - if not learnable: - return state - else: - state_shape = state.get_shape() - state_shape.assert_is_fully_defined() - state_shape_list = state_shape.as_list() - batch_size, trailing_shape = state_shape_list[0], state_shape_list[1:] - - initial_value = tf.reshape(unpacked_state[0], [1] + trailing_shape) - initial_state_variable = tf.get_variable( - "initial_state_%d" % state_id, dtype=initial_value.dtype, - initializer=initial_value) - - trailing_size_repeat = [1] * len(trailing_shape) - return tf.tile(initial_state_variable, - tf.constant([batch_size] + trailing_size_repeat)) - - -def trainable_initial_state(batch_size, state_size, dtype, initializers=None): - """Creates an initial state consisting of trainable variables. - - The trainable variables are created with the same shapes as the elements of - `state_size` and are tiled to produce an initial state. - - Args: - batch_size: An int, or scalar int32 Tensor representing the batch size. - state_size: A `TensorShape` or nested tuple of `TensorShape`s to use for the - shape of the trainable variables. - dtype: The data type used to create the variables and thus initial state. - initializers: An optional container of the same structure as `state_size` - containing initializers for the variables. - - Returns: - A `Tensor` or nested tuple of `Tensor`s with the same size and structure - as `state_size`, where each `Tensor` is a tiled trainable `Variable`. - - Raises: - ValueError: if the user passes initializers that are not functions. - """ - flat_state_size = nest.flatten(state_size) - - if not initializers: - flat_initializer = tuple(tf.zeros_initializer for _ in flat_state_size) - else: - nest.assert_same_structure(initializers, state_size) - flat_initializer = nest.flatten(initializers) - if not all([callable(init) for init in flat_initializer]): - raise ValueError("Not all the passed initializers are callable objects.") - - # Produce names for the variables. In the case of a tuple or nested tuple, - # this is just a sequence of numbers, but for a flat `namedtuple`, we use - # the field names. NOTE: this could be extended to nested `namedtuple`s, - # but for now that's extra complexity that's not used anywhere. - try: - names = ["init_{}".format(state_size._fields[i]) - for i in xrange(len(flat_state_size))] - except (AttributeError, IndexError): - names = ["init_state_{}".format(i) for i in xrange(len(flat_state_size))] - - flat_initial_state = [] - - for name, size, init in zip(names, flat_state_size, flat_initializer): - shape_with_batch_dim = [1] + tensor_shape.as_shape(size).as_list() - initial_state_variable = tf.get_variable( - name, shape=shape_with_batch_dim, dtype=dtype, initializer=init) - - initial_state_variable_dims = initial_state_variable.get_shape().ndims - tile_dims = [batch_size] + [1] * (initial_state_variable_dims - 1) - flat_initial_state.append( - tf.tile(initial_state_variable, tile_dims, name=(name + "_tiled"))) - - return nest.pack_sequence_as(structure=state_size, - flat_sequence=flat_initial_state) - - -@six.add_metaclass(abc.ABCMeta) -class RNNCore(base.AbstractModule, rnn.RNNCell): - """Superclass for Recurrent Neural Network Cores. - - This class defines the basic functionality that every core should implement, - mainly the `initial_state` method which will return an example of their - initial state. - It also inherits from the two interfaces it should be compatible with, which - are `nn.Module` and `rnn_cell.RNNCell`. - - As with any other `nn.Module` any subclass must implement a `_build` method - that constructs the graph that corresponds to a core. Such a build method - should always have the same interface, which is the following: - - output, new_state = self._build(input, prev_state) - - where output, new_state, input, and prev_state are arbitrarily nested - tensors. Such structures can be defined according to the following - grammar: - - element = tuple(element*) | list(element*) | tf.Tensor - - This class is to be used with tensorflow containers such as `rnn` in - tensorflow.python.ops.rnn. These containers only accept `rnn_cell.RNNCell` - objects, hence the need to comply with its interface. This way, all the - RNNCores should expose a `state_size` and `output_size` properties. - """ - __metaclass__ = abc.ABCMeta - - def initial_state(self, batch_size, dtype=tf.float32, trainable=False, - trainable_initializers=None): - """Builds the default start state for an RNNCore. - - Args: - batch_size: An int, or scalar int32 Tensor representing the batch size. - dtype: The data type to use for the state. - trainable: Boolean that indicates whether to learn the initial state. - trainable_initializers: An initializer function or nested structure of - functions with same structure as the `state_size` property of the - core, to be used as initializers of the initial state variable. - - Returns: - A tensor or nested tuple of tensors with same structure and shape as the - `state_size` property of the core. - - Raises: - ValueError: if the user passes initializers that are not functions. - """ - if not trainable: - return super(RNNCore, self).zero_state(batch_size, dtype) - else: - return trainable_initial_state( - batch_size, self.state_size, dtype, trainable_initializers) - - -class TrainableInitialState(base.AbstractModule): - """Helper Module that creates a learnable initial state for an RNNCore. - - This class receives an example (possibly nested) initial state of an RNNCore, - and returns a state that has the same shape, structure, and values, but is - trainable. Additionally, the user may specify a boolean mask that - indicates which parts of the initial state should be trainable. - - This allows users to train an unrolled RNNCore with a learnable initial state - in the following way: - - core = ... # Any RNNCore module object. - initial_state = core.initial_state(batch_size, dtype) - trainable_initial_state = nn.TrainableInitialState(initial_state)() - output, final_state = tf.nn.dynamic_rnn( - core, input_sequence, initial_state=trainable_initial_state) - """ - - def __init__(self, initial_state, mask=None, name="trainable_initial_state"): - """Constructs the Module that introduces a trainable state in the graph. - - It receives an initial state that will be used as the intial values for the - trainable variables that the module contains, and optionally a mask that - indicates the parts of the initial state that should be learnable. - - Args: - initial_state: tensor or arbitrarily nested iterables of tensors. - mask: optional boolean mask. It should have the same nested structure as - the given initial_state. - name: module name. - - Raises: - TypeError: if mask is not a list of booleans or None. - """ - super(TrainableInitialState, self).__init__(name=name) - - # Since python 2.7, DeprecationWarning is ignored by default. - # Turn on the warning: - warnings.simplefilter("always", DeprecationWarning) - warnings.warn("Use the trainable flag in initial_state instead.", - DeprecationWarning, stacklevel=2) - - if mask is not None: - flat_mask = nest.flatten(mask) - if not all([isinstance(m, bool) for m in flat_mask]): - raise TypeError("Mask should be None or a list of boolean values.") - nest.assert_same_structure(initial_state, mask) - - self._mask = mask - self._initial_state = initial_state - - def _build(self): - """Connects the module to the graph. - - Returns: - The learnable state, which has the same type, structure and shape as - the `initial_state` passed to the constructor. - """ - flat_initial_state = nest.flatten(self._initial_state) - if self._mask is not None: - flat_mask = nest.flatten(self._mask) - flat_learnable_state = [ - _single_learnable_state(state, state_id=i, learnable=mask) - for i, (state, mask) in enumerate(zip(flat_initial_state, flat_mask))] - else: - flat_learnable_state = [_single_learnable_state(state, state_id=i) - for i, state in enumerate(flat_initial_state)] - - return nest.pack_sequence_as(structure=self._initial_state, - flat_sequence=flat_learnable_state) - diff --git a/nn/sequential.py b/nn/sequential.py deleted file mode 100644 index b30cd3a..0000000 --- a/nn/sequential.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Sequential Module for TensorFlow nn. - -A Module that wraps a list of other modules and ops, connecting the output of -each to the input of the next. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from nn import base - - -class Sequential(base.AbstractModule): - """Builds a module out of a sequence of callables.""" - - def __init__(self, layers, name="sequential"): - """Constructs a Sequential module. - - This feeds the output of each layer into the next and returns the output - of the final layer. - - If a layer returns a tuple, it is assumed that this must be unpacked into - the argument list of the next layer. If it is not a tuple, it is simply - passed through to the next layer unchanged. - - Args: - layers: Iterable of callables to stack together, which can be modules - or ops. - name: Name of the module. - - Raises: - TypeError: If `layers` is None or contains any non-callable items. - """ - super(Sequential, self).__init__(name=name) - - # Store a copy of the iterable in a tuple to ensure users cannot modify the - # iterable later, and protect against iterables which can only be read once. - self._layers = tuple(layers) - - is_not_callable = [(i, mod) for i, mod in enumerate(self._layers) - if not callable(mod)] - - if is_not_callable: - raise TypeError("Items {} not callable with types: {}".format( - ", ".join(str(i) for i, _ in is_not_callable), - ", ".join(type(layer).__name__ for _, layer in is_not_callable))) - - def _build(self, *args): - """Connects the Sequential module into the graph. - - Args: - *args: A tuple of inputs, to be unpacked as the arguments to the first - layer. - - Returns: - The output value of the last layer. - """ - net = args - - for layer in self._layers: - if isinstance(net, tuple): - net = layer(*net) - else: - net = layer(net) - - return net - - @property - def layers(self): - return self._layers diff --git a/nn/util.py b/nn/util.py deleted file mode 100644 index ae59e65..0000000 --- a/nn/util.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utility functions for dealing with nn Modules.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re - -import tensorflow as tf - - -def get_variables_in_scope(scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES): - """Returns a tuple `tf.Variable`s in a scope for a given collection. - - Args: - scope: `tf.VariableScope` instance to retrieve variables from. - collection: Collection to restrict query to. By default this is - `tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable - variables such as moving averages. - - Returns: - A tuple of `tf.Variable` objects. - """ - # Escape the name in case it contains any "." characters. Add a closing slash - # so we will not search any scopes that have this scope name as a prefix. - scope_name = re.escape(scope.name) + "/" - - return tuple(tf.get_collection(collection, scope_name)) - - -def get_variables_in_module(module, - collection=tf.GraphKeys.TRAINABLE_VARIABLES): - """Returns tuple of `tf.Variable`s declared inside an `nn.Module`. - - Note that this operates by searching the variable scope a module contains, - and so does not know about any modules which were constructed elsewhere but - used inside this module. - - Args: - module: `nn.Module` instance to query the scope of. - collection: Collection to restrict query to. By default this is - `tf.Graphkeys.TRAINABLE_VARIABLES`, which doesn't include non-trainable - variables such as moving averages. - - Returns: - A tuple of `tf.Variable` objects. - - Raises: - NotConnectedError: If the module is not connected to the Graph. - """ - return get_variables_in_scope(module.variable_scope, collection=collection) - - -def check_initializers(initializers, keys): - """Checks the given initializers. - - This checks that `initializers` is a dictionary that only contains keys in - `keys`, and furthermore the entries in `initializers` are functions or - further dictionaries (the latter used, for example, in passing initializers - to modules inside modules) which must satisfy the same constraints. - - Args: - initializers: Dictionary of initializers (allowing nested dictionaries) or - None. - keys: Iterable of valid keys for `initializers`. - - Returns: - Copy of checked dictionary of initializers. - - Raises: - KeyError: If an initializer is provided for a key not in `keys`. - TypeError: If a provided initializer is not a callable function, or if the - dict of initializers is not in fact a dict. - """ - if initializers is None: - return {} - - keys = set(keys) - - # If the user is creating modules that nests other modules, then it is - # possible that they might not nest the initializer dictionaries correctly. If - # that is the case, then we might find that initializers is not a dict here. - # We raise a helpful exception in this case. - if not issubclass(type(initializers), dict): - raise TypeError("A dict of initializers was expected, but not " - "given. You should double-check that you've nested the " - "initializers for any sub-modules correctly.") - - if not set(initializers) <= keys: - extra_keys = set(initializers) - keys - raise KeyError( - "Invalid initializer keys {}, initializers can only " - "be provided for {}".format( - ", ".join("'{}'".format(key) for key in extra_keys), - ", ".join("'{}'".format(key) for key in keys))) - - def check_nested_callables(dictionary): - for key, entry in dictionary.items(): - if isinstance(entry, dict): - check_nested_callables(entry) - elif not callable(entry): - raise TypeError( - "Initializer for '{}' is not a callable function or dictionary" - .format(key)) - - check_nested_callables(initializers) - - return dict(initializers) - - -def check_partitioners(partitioners, keys): - """Checks the given partitioners. - - This checks that `partitioners` is a dictionary that only contains keys in - `keys`, and furthermore the entries in `partitioners` are functions or - further dictionaries (the latter used, for example, in passing partitioners - to modules inside modules) which must satisfy the same constraints. - - Args: - partitioners: Dictionary of partitioners (allowing nested dictionaries) or - None. - keys: Iterable of valid keys for `partitioners`. - - Returns: - Checked dictionary of partitioners. - - Raises: - KeyError: If an partitioner is provided for a key not in `keys`. - TypeError: If a provided partitioner is not a callable function. - """ - if partitioners is None: - return {} - - keys = set(keys) - - if not set(partitioners) <= keys: - extra_keys = set(partitioners) - keys - raise KeyError( - "Invalid partitioner keys {}, partitioners can only " - "be provided for {}".format( - ", ".join("'{}'".format(key) for key in extra_keys), - ", ".join("'{}'".format(key) for key in keys))) - - def check_nested_callables(dictionary): - for key, entry in dictionary.items(): - if isinstance(entry, dict): - check_nested_callables(entry) - elif not callable(entry): - raise TypeError( - "Partitioner for '{}' is not a callable function or dictionary" - .format(key)) - - check_nested_callables(partitioners) - - return partitioners diff --git a/problems.py b/problems.py index f9bca57..553556f 100644 --- a/problems.py +++ b/problems.py @@ -24,12 +24,11 @@ from six.moves import urllib from six.moves import xrange # pylint: disable=redefined-builtin +import sonnet as snt import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets import mnist as mnist_dataset -import nn - _nn_initializers = { "w": tf.random_normal_initializer(mean=0, stddev=0.01), @@ -160,10 +159,10 @@ def mnist(layers, # pylint: disable=invalid-name labels = tf.constant(data.labels, dtype=tf.int64, name="MNIST_labels") # Network. - mlp = nn.MLP(list(layers) + [10], - activation=activation_op, - initializers=_nn_initializers) - network = nn.Sequential([nn.BatchFlatten(), mlp]) + mlp = snt.nets.MLP(list(layers) + [10], + activation=activation_op, + initializers=_nn_initializers) + network = snt.Sequential([snt.BatchFlatten(), mlp]) def build(): indices = tf.random_uniform([batch_size], 0, data.num_examples, tf.int64) @@ -249,24 +248,24 @@ def _conv_activation(x): # pylint: disable=invalid-name strides=[1, 2, 2, 1], padding="SAME") - conv = nn.ConvNet2D(output_channels=conv_channels, - kernel_shapes=[5], - strides=[1], - paddings=[nn.SAME], - activation=_conv_activation, - activate_final=True, - initializers=_nn_initializers, - use_batch_norm=batch_norm) + conv = snt.nets.ConvNet2D(output_channels=conv_channels, + kernel_shapes=[5], + strides=[1], + paddings=[snt.SAME], + activation=_conv_activation, + activate_final=True, + initializers=_nn_initializers, + use_batch_norm=batch_norm) if batch_norm: - linear_activation = lambda x: tf.nn.relu(nn.BatchNorm()(x)) + linear_activation = lambda x: tf.nn.relu(snt.BatchNorm()(x)) else: linear_activation = tf.nn.relu - mlp = nn.MLP(list(linear_layers) + [10], - activation=linear_activation, - initializers=_nn_initializers) - network = nn.Sequential([conv, nn.BatchFlatten(), mlp]) + mlp = snt.nets.MLP(list(linear_layers) + [10], + activation=linear_activation, + initializers=_nn_initializers) + network = snt.Sequential([conv, snt.BatchFlatten(), mlp]) def build(): image_batch, label_batch = queue.dequeue_many(batch_size)