From 20921137141b154454c0a2698709d9f9a0302101 Mon Sep 17 00:00:00 2001 From: jachiam Date: Thu, 30 Jan 2020 08:09:39 -0800 Subject: [PATCH] PyTorch update going live. --- docs/algorithms/ddpg.rst | 31 +- docs/algorithms/ppo.rst | 33 +- docs/algorithms/sac.rst | 139 +- docs/algorithms/td3.rst | 42 +- docs/algorithms/trpo.rst | 6 +- docs/algorithms/vpg.rst | 31 +- docs/images/ex2-2_ddpg_bug_pytorch.png | Bin 0 -> 134148 bytes .../plots/ddpg/ddpg_ant_performance.svg | 2610 +++++ .../ddpg/ddpg_halfcheetah_performance.svg | 2561 +++++ .../plots/ddpg/ddpg_hopper_performance.svg | 2697 +++++ .../plots/ddpg/ddpg_swimmer_performance.svg | 2568 +++++ .../plots/ddpg/ddpg_walker2d_performance.svg | 2701 +++++ docs/images/plots/ppo/ppo_ant_performance.svg | 4802 ++++++++ .../plots/ppo/ppo_halfcheetah_performance.svg | 4742 ++++++++ .../plots/ppo/ppo_hopper_performance.svg | 4941 +++++++++ .../plots/ppo/ppo_swimmer_performance.svg | 4246 +++++++ .../plots/ppo/ppo_walker2d_performance.svg | 4898 +++++++++ .../plots/pyt/pytorch_ant_performance.svg | 7104 ++++++++++++ .../pyt/pytorch_halfcheetah_performance.svg | 7146 ++++++++++++ .../plots/pyt/pytorch_hopper_performance.svg | 7574 +++++++++++++ .../plots/pyt/pytorch_swimmer_performance.svg | 7016 ++++++++++++ .../pyt/pytorch_walker2d_performance.svg | 7282 ++++++++++++ docs/images/plots/sac/sac_ant_performance.svg | 2497 +++++ .../plots/sac/sac_halfcheetah_performance.svg | 2475 +++++ .../plots/sac/sac_hopper_performance.svg | 2601 +++++ .../plots/sac/sac_swimmer_performance.svg | 2595 +++++ .../plots/sac/sac_walker2d_performance.svg | 2526 +++++ docs/images/plots/td3/td3_ant_performance.svg | 2593 +++++ .../plots/td3/td3_halfcheetah_performance.svg | 2557 +++++ .../plots/td3/td3_hopper_performance.svg | 2672 +++++ .../plots/td3/td3_swimmer_performance.svg | 2479 +++++ .../plots/td3/td3_walker2d_performance.svg | 2632 +++++ .../plots/tf1/tensorflow_ant_performance.svg | 8949 +++++++++++++++ .../tensorflow_halfcheetah_performance.svg | 8718 +++++++++++++++ .../tf1/tensorflow_hopper_performance.svg | 9729 +++++++++++++++++ .../tf1/tensorflow_swimmer_performance.svg | 8748 +++++++++++++++ .../tf1/tensorflow_walker2d_performance.svg | 9360 ++++++++++++++++ docs/images/plots/vpg/vpg_ant_performance.svg | 5082 +++++++++ .../plots/vpg/vpg_halfcheetah_performance.svg | 4819 ++++++++ .../plots/vpg/vpg_hopper_performance.svg | 4836 ++++++++ .../plots/vpg/vpg_swimmer_performance.svg | 4302 ++++++++ .../plots/vpg/vpg_walker2d_performance.svg | 4778 ++++++++ docs/spinningup/bench.rst | 135 +- docs/spinningup/bench_ddpg.rst | 35 + docs/spinningup/bench_ppo.rst | 35 + docs/spinningup/bench_sac.rst | 35 + docs/spinningup/bench_td3.rst | 35 + docs/spinningup/bench_vpg.rst | 35 + docs/spinningup/exercise2_2_soln.rst | 71 +- docs/spinningup/exercises.rst | 53 +- .../spinningup/extra_tf_pg_implementation.rst | 181 + docs/spinningup/rl_intro.rst | 40 +- docs/spinningup/rl_intro3.rst | 207 +- docs/user/algorithms.rst | 51 +- docs/user/introduction.rst | 38 +- docs/user/running.rst | 47 +- docs/user/saving_and_loading.rst | 73 +- docs/utils/logger.rst | 30 +- docs/utils/mpi.rst | 27 +- readme.md | 16 +- setup.py | 4 +- spinup/__init__.py | 24 +- spinup/algos/pytorch/ddpg/core.py | 61 + spinup/algos/pytorch/ddpg/ddpg.py | 327 + spinup/algos/pytorch/ppo/core.py | 135 + spinup/algos/pytorch/ppo/ppo.py | 378 + spinup/algos/pytorch/sac/core.py | 98 + spinup/algos/pytorch/sac/sac.py | 370 + spinup/algos/pytorch/td3/core.py | 62 + spinup/algos/pytorch/td3/td3.py | 368 + spinup/algos/pytorch/trpo/trpo.py | 4 + spinup/algos/pytorch/vpg/core.py | 135 + spinup/algos/pytorch/vpg/vpg.py | 350 + spinup/algos/{ => tf1}/ddpg/__init__.py | 0 spinup/algos/{ => tf1}/ddpg/core.py | 4 +- spinup/algos/{ => tf1}/ddpg/ddpg.py | 72 +- spinup/algos/{ => tf1}/ppo/__init__.py | 0 spinup/algos/{ => tf1}/ppo/core.py | 2 +- spinup/algos/{ => tf1}/ppo/ppo.py | 29 +- spinup/algos/{ => tf1}/sac/__init__.py | 0 spinup/algos/{ => tf1}/sac/core.py | 50 +- spinup/algos/{ => tf1}/sac/sac.py | 122 +- spinup/algos/{ => tf1}/td3/__init__.py | 0 spinup/algos/{ => tf1}/td3/core.py | 4 +- spinup/algos/{ => tf1}/td3/td3.py | 72 +- spinup/algos/{ => tf1}/trpo/__init__.py | 0 spinup/algos/{ => tf1}/trpo/core.py | 2 +- spinup/algos/{ => tf1}/trpo/trpo.py | 27 +- spinup/algos/{ => tf1}/vpg/__init__.py | 0 spinup/algos/{ => tf1}/vpg/core.py | 2 +- spinup/algos/{ => tf1}/vpg/vpg.py | 28 +- spinup/examples/pytorch/bench_ppo_cartpole.py | 19 + .../examples/pytorch/pg_math/1_simple_pg.py | 128 + spinup/examples/pytorch/pg_math/2_rtg_pg.py | 135 + .../examples/{ => tf1}/bench_ppo_cartpole.py | 6 +- .../examples/{ => tf1}/pg_math/1_simple_pg.py | 0 spinup/examples/{ => tf1}/pg_math/2_rtg_pg.py | 0 spinup/examples/{ => tf1}/train_mnist.py | 0 spinup/exercises/problem_set_2/exercise2_3.py | 317 - .../pytorch/problem_set_1/exercise1_1.py | 55 + .../pytorch/problem_set_1/exercise1_2.py | 134 + .../problem_set_1/exercise1_2_auxiliary.py | 54 + .../pytorch/problem_set_1/exercise1_3.py | 416 + .../exercise1_1_soln.py | 8 + .../exercise1_2_soln.py | 49 + .../pytorch/problem_set_2/exercise2_2.py | 93 + .../{ => tf1}/problem_set_1/exercise1_1.py | 2 +- .../{ => tf1}/problem_set_1/exercise1_2.py | 4 +- .../{ => tf1}/problem_set_1/exercise1_3.py | 69 +- .../exercise1_1_soln.py | 0 .../exercise1_2_soln.py | 0 .../{ => tf1}/problem_set_2/exercise2_2.py | 4 +- spinup/run.py | 24 +- spinup/user_config.py | 11 + spinup/utils/logx.py | 50 +- spinup/utils/mpi_pytorch.py | 35 + spinup/utils/plot.py | 2 + spinup/utils/test_policy.py | 93 +- spinup/version.py | 2 +- 119 files changed, 172854 insertions(+), 848 deletions(-) create mode 100644 docs/images/ex2-2_ddpg_bug_pytorch.png create mode 100644 docs/images/plots/ddpg/ddpg_ant_performance.svg create mode 100644 docs/images/plots/ddpg/ddpg_halfcheetah_performance.svg create mode 100644 docs/images/plots/ddpg/ddpg_hopper_performance.svg create mode 100644 docs/images/plots/ddpg/ddpg_swimmer_performance.svg create mode 100644 docs/images/plots/ddpg/ddpg_walker2d_performance.svg create mode 100644 docs/images/plots/ppo/ppo_ant_performance.svg create mode 100644 docs/images/plots/ppo/ppo_halfcheetah_performance.svg create mode 100644 docs/images/plots/ppo/ppo_hopper_performance.svg create mode 100644 docs/images/plots/ppo/ppo_swimmer_performance.svg create mode 100644 docs/images/plots/ppo/ppo_walker2d_performance.svg create mode 100644 docs/images/plots/pyt/pytorch_ant_performance.svg create mode 100644 docs/images/plots/pyt/pytorch_halfcheetah_performance.svg create mode 100644 docs/images/plots/pyt/pytorch_hopper_performance.svg create mode 100644 docs/images/plots/pyt/pytorch_swimmer_performance.svg create mode 100644 docs/images/plots/pyt/pytorch_walker2d_performance.svg create mode 100644 docs/images/plots/sac/sac_ant_performance.svg create mode 100644 docs/images/plots/sac/sac_halfcheetah_performance.svg create mode 100644 docs/images/plots/sac/sac_hopper_performance.svg create mode 100644 docs/images/plots/sac/sac_swimmer_performance.svg create mode 100644 docs/images/plots/sac/sac_walker2d_performance.svg create mode 100644 docs/images/plots/td3/td3_ant_performance.svg create mode 100644 docs/images/plots/td3/td3_halfcheetah_performance.svg create mode 100644 docs/images/plots/td3/td3_hopper_performance.svg create mode 100644 docs/images/plots/td3/td3_swimmer_performance.svg create mode 100644 docs/images/plots/td3/td3_walker2d_performance.svg create mode 100644 docs/images/plots/tf1/tensorflow_ant_performance.svg create mode 100644 docs/images/plots/tf1/tensorflow_halfcheetah_performance.svg create mode 100644 docs/images/plots/tf1/tensorflow_hopper_performance.svg create mode 100644 docs/images/plots/tf1/tensorflow_swimmer_performance.svg create mode 100644 docs/images/plots/tf1/tensorflow_walker2d_performance.svg create mode 100644 docs/images/plots/vpg/vpg_ant_performance.svg create mode 100644 docs/images/plots/vpg/vpg_halfcheetah_performance.svg create mode 100644 docs/images/plots/vpg/vpg_hopper_performance.svg create mode 100644 docs/images/plots/vpg/vpg_swimmer_performance.svg create mode 100644 docs/images/plots/vpg/vpg_walker2d_performance.svg create mode 100644 docs/spinningup/bench_ddpg.rst create mode 100644 docs/spinningup/bench_ppo.rst create mode 100644 docs/spinningup/bench_sac.rst create mode 100644 docs/spinningup/bench_td3.rst create mode 100644 docs/spinningup/bench_vpg.rst create mode 100644 docs/spinningup/extra_tf_pg_implementation.rst create mode 100644 spinup/algos/pytorch/ddpg/core.py create mode 100644 spinup/algos/pytorch/ddpg/ddpg.py create mode 100644 spinup/algos/pytorch/ppo/core.py create mode 100644 spinup/algos/pytorch/ppo/ppo.py create mode 100644 spinup/algos/pytorch/sac/core.py create mode 100644 spinup/algos/pytorch/sac/sac.py create mode 100644 spinup/algos/pytorch/td3/core.py create mode 100644 spinup/algos/pytorch/td3/td3.py create mode 100644 spinup/algos/pytorch/trpo/trpo.py create mode 100644 spinup/algos/pytorch/vpg/core.py create mode 100644 spinup/algos/pytorch/vpg/vpg.py rename spinup/algos/{ => tf1}/ddpg/__init__.py (100%) rename spinup/algos/{ => tf1}/ddpg/core.py (93%) rename spinup/algos/{ => tf1}/ddpg/ddpg.py (85%) rename spinup/algos/{ => tf1}/ppo/__init__.py (100%) rename spinup/algos/{ => tf1}/ppo/core.py (99%) rename spinup/algos/{ => tf1}/ppo/ppo.py (96%) rename spinup/algos/{ => tf1}/sac/__init__.py (100%) rename spinup/algos/{ => tf1}/sac/core.py (51%) rename spinup/algos/{ => tf1}/sac/sac.py (78%) rename spinup/algos/{ => tf1}/td3/__init__.py (100%) rename spinup/algos/{ => tf1}/td3/core.py (93%) rename spinup/algos/{ => tf1}/td3/td3.py (86%) rename spinup/algos/{ => tf1}/trpo/__init__.py (100%) rename spinup/algos/{ => tf1}/trpo/core.py (99%) rename spinup/algos/{ => tf1}/trpo/trpo.py (97%) rename spinup/algos/{ => tf1}/vpg/__init__.py (100%) rename spinup/algos/{ => tf1}/vpg/core.py (99%) rename spinup/algos/{ => tf1}/vpg/vpg.py (96%) create mode 100644 spinup/examples/pytorch/bench_ppo_cartpole.py create mode 100644 spinup/examples/pytorch/pg_math/1_simple_pg.py create mode 100644 spinup/examples/pytorch/pg_math/2_rtg_pg.py rename spinup/examples/{ => tf1}/bench_ppo_cartpole.py (84%) rename spinup/examples/{ => tf1}/pg_math/1_simple_pg.py (100%) rename spinup/examples/{ => tf1}/pg_math/2_rtg_pg.py (100%) rename spinup/examples/{ => tf1}/train_mnist.py (100%) delete mode 100644 spinup/exercises/problem_set_2/exercise2_3.py create mode 100644 spinup/exercises/pytorch/problem_set_1/exercise1_1.py create mode 100644 spinup/exercises/pytorch/problem_set_1/exercise1_2.py create mode 100644 spinup/exercises/pytorch/problem_set_1/exercise1_2_auxiliary.py create mode 100644 spinup/exercises/pytorch/problem_set_1/exercise1_3.py create mode 100644 spinup/exercises/pytorch/problem_set_1_solutions/exercise1_1_soln.py create mode 100644 spinup/exercises/pytorch/problem_set_1_solutions/exercise1_2_soln.py create mode 100644 spinup/exercises/pytorch/problem_set_2/exercise2_2.py rename spinup/exercises/{ => tf1}/problem_set_1/exercise1_1.py (95%) rename spinup/exercises/{ => tf1}/problem_set_1/exercise1_2.py (97%) rename spinup/exercises/{ => tf1}/problem_set_1/exercise1_3.py (88%) rename spinup/exercises/{ => tf1}/problem_set_1_solutions/exercise1_1_soln.py (100%) rename spinup/exercises/{ => tf1}/problem_set_1_solutions/exercise1_2_soln.py (100%) rename spinup/exercises/{ => tf1}/problem_set_2/exercise2_2.py (96%) create mode 100644 spinup/utils/mpi_pytorch.py diff --git a/docs/algorithms/ddpg.rst b/docs/algorithms/ddpg.rst index 8c61759a0..1c01e912f 100644 --- a/docs/algorithms/ddpg.rst +++ b/docs/algorithms/ddpg.rst @@ -173,10 +173,35 @@ Pseudocode Documentation ============= -.. autofunction:: spinup.ddpg +.. admonition:: You Should Know + + In what follows, we give documentation for the PyTorch and Tensorflow implementations of DDPG in Spinning Up. They have nearly identical function calls and docstrings, except for details relating to model construction. However, we include both full docstrings for completeness. + + +Documentation: PyTorch Version +------------------------------ + +.. autofunction:: spinup.ddpg_pytorch + +Saved Model Contents: PyTorch Version +------------------------------------- + +The PyTorch saved model can be loaded with ``ac = torch.load('path/to/model.pt')``, yielding an actor-critic object (``ac``) that has the properties described in the docstring for ``ddpg_pytorch``. + +You can get actions from this model with + +.. code-block:: python + + actions = ac.act(torch.as_tensor(obs, dtype=torch.float32)) + + +Documentation: Tensorflow Version +--------------------------------- + +.. autofunction:: spinup.ddpg_tf1 -Saved Model Contents --------------------- +Saved Model Contents: Tensorflow Version +---------------------------------------- The computation graph saved by the logger includes: diff --git a/docs/algorithms/ppo.rst b/docs/algorithms/ppo.rst index 04fb3469b..97d7b712c 100644 --- a/docs/algorithms/ppo.rst +++ b/docs/algorithms/ppo.rst @@ -147,13 +147,40 @@ Pseudocode \end{algorithm} + + Documentation ============= -.. autofunction:: spinup.ppo +.. admonition:: You Should Know + + In what follows, we give documentation for the PyTorch and Tensorflow implementations of PPO in Spinning Up. They have nearly identical function calls and docstrings, except for details relating to model construction. However, we include both full docstrings for completeness. + + +Documentation: PyTorch Version +------------------------------ + +.. autofunction:: spinup.ppo_pytorch + +Saved Model Contents: PyTorch Version +------------------------------------- + +The PyTorch saved model can be loaded with ``ac = torch.load('path/to/model.pt')``, yielding an actor-critic object (``ac``) that has the properties described in the docstring for ``ppo_pytorch``. + +You can get actions from this model with + +.. code-block:: python + + actions = ac.act(torch.as_tensor(obs, dtype=torch.float32)) + + +Documentation: Tensorflow Version +--------------------------------- + +.. autofunction:: spinup.ppo_tf1 -Saved Model Contents --------------------- +Saved Model Contents: Tensorflow Version +---------------------------------------- The computation graph saved by the logger includes: diff --git a/docs/algorithms/sac.rst b/docs/algorithms/sac.rst index 84010d1be..6df7ff501 100644 --- a/docs/algorithms/sac.rst +++ b/docs/algorithms/sac.rst @@ -11,7 +11,7 @@ Background .. _`Background for TD3`: ../algorithms/td3.html#background -Soft Actor Critic (SAC) is an algorithm which optimizes a stochastic policy in an off-policy way, forming a bridge between stochastic policy optimization and DDPG-style approaches. It isn't a direct successor to TD3 (having been published roughly concurrently), but it incorporates the clipped double-Q trick, and due to the inherent stochasticity of the policy in SAC, it also winds up benefiting from something like target policy smoothing. +Soft Actor Critic (SAC) is an algorithm that optimizes a stochastic policy in an off-policy way, forming a bridge between stochastic policy optimization and DDPG-style approaches. It isn't a direct successor to TD3 (having been published roughly concurrently), but it incorporates the clipped double-Q trick, and due to the inherent stochasticity of the policy in SAC, it also winds up benefiting from something like target policy smoothing. A central feature of SAC is **entropy regularization.** The policy is trained to maximize a trade-off between expected return and `entropy`_, a measure of randomness in the policy. This has a close connection to the exploration-exploitation trade-off: increasing entropy results in more exploration, which can accelerate learning later on. It can also prevent the policy from prematurely converging to a bad local optimum. @@ -82,44 +82,72 @@ and the Bellman equation for :math:`Q^{\pi}` is Soft Actor-Critic ^^^^^^^^^^^^^^^^^ -SAC concurrently learns a policy :math:`\pi_{\theta}`, two Q-functions :math:`Q_{\phi_1}, Q_{\phi_2}`, and a value function :math:`V_{\psi}`. +SAC concurrently learns a policy :math:`\pi_{\theta}` and two Q-functions :math:`Q_{\phi_1}, Q_{\phi_2}`. There are two variants of SAC that are currently standard: one that uses a fixed entropy regularization coefficient :math:`\alpha`, and another that enforces an entropy constraint by varying :math:`\alpha` over the course of training. For simplicity, Spinning Up makes use of the version with a fixed entropy regularization coefficient, but the entropy-constrained variant is generally preferred by practitioners. -**Learning Q.** The Q-functions are learned by MSBE minimization, using a **target value network** to form the Bellman backups. They both use the same target, like in TD3, and have loss functions: +.. admonition:: You Should Know -.. math:: + The SAC algorithm has changed a little bit over time. An older version of SAC also learns a value function :math:`V_{\psi}` in addition to the Q-functions; this page will focus on the modern version that omits the extra value function. - L(\phi_i, {\mathcal D}) = \underset{(s,a,r,s',d) \sim {\mathcal D}}{{\mathrm E}}\left[ - \Bigg( Q_{\phi_i}(s,a) - \left(r + \gamma (1 - d) V_{\psi_{\text{targ}}}(s') \right) \Bigg)^2 - \right]. -The target value network, like the target networks in DDPG and TD3, is obtained by polyak averaging the value network parameters over the course of training. -**Learning V.** The value function is learned by exploiting (a sample-based approximation of) the connection between :math:`Q^{\pi}` and :math:`V^{\pi}`. Before we go into the learning rule, let's first rewrite the connection equation by using the definition of entropy to obtain: +**Learning Q.** The Q-functions are learned in a similar way to TD3, but with a few key differences. + +First, what's similar? + +1) Like in TD3, both Q-functions are learned with MSBE minimization, by regressing to a single shared target. + +2) Like in TD3, the shared target is computed using target Q-networks, and the target Q-networks are obtained by polyak averaging the Q-network parameters over the course of training. + +3) Like in TD3, the shared target makes use of the **clipped double-Q** trick. + +What's different? + +1) Unlike in TD3, the target also includes a term that comes from SAC's use of entropy regularization. + +2) Unlike in TD3, the next-state actions used in the target come from the **current policy** instead of a target policy. + +3) Unlike in TD3, there is no explicit target policy smoothing. TD3 trains a deterministic policy, and so it accomplishes smoothing by adding random noise to the next-state actions. SAC trains a stochastic policy, and so the noise from that stochasticity is sufficient to get a similar effect. + +Before we give the final form of the Q-loss, let’s take a moment to discuss how the contribution from entropy regularization comes in. We'll start by taking our recursive Bellman equation for the entropy-regularized :math:`Q^{\pi}` from earlier, and rewriting it a little bit by using the definition of entropy: .. math:: - V^{\pi}(s) &= \underE{a \sim \pi}{Q^{\pi}(s,a)} + \alpha H\left(\pi(\cdot|s)\right) \\ - &= \underE{a \sim \pi}{Q^{\pi}(s,a) - \alpha \log \pi(a|s)}. + Q^{\pi}(s,a) &= \underE{s' \sim P \\ a' \sim \pi}{R(s,a,s') + \gamma\left(Q^{\pi}(s',a') + \alpha H\left(\pi(\cdot|s')\right) \right)} \\ + &= \underE{s' \sim P \\ a' \sim \pi}{R(s,a,s') + \gamma\left(Q^{\pi}(s',a') - \alpha \log \pi(a'|s') \right)} -The RHS is an expectation over actions, so we can approximate it by sampling from the policy: +The RHS is an expectation over next states (which come from the replay buffer) and next actions (which come from the current policy, and **not** the replay buffer). Since it's an expectation, we can approximate it with samples: .. math:: - V^{\pi}(s) \approx Q^{\pi}(s,\tilde{a}) - \alpha \log \pi(\tilde{a}|s), \;\;\;\;\; \tilde{a} \sim \pi(\cdot|s). + Q^{\pi}(s,a) &\approx r + \gamma\left(Q^{\pi}(s',\tilde{a}') - \alpha \log \pi(\tilde{a}'|s') \right), \;\;\;\;\; \tilde{a}' \sim \pi(\cdot|s'). + +.. admonition:: You Should Know + + We switch next action notation to :math:`\tilde{a}'`, instead of :math:`a'`, to highlight that the next actions have to be sampled fresh from the policy (whereas by contrast, :math:`r` and :math:`s'` should come from the replay buffer). -SAC sets up a mean-squared-error loss for :math:`V_{\psi}` based on this approximation. But what Q-value do we use? SAC uses **clipped double-Q** like TD3 for learning the value function, and takes the minimum Q-value between the two approximators. So the SAC loss for value function parameters is: +SAC sets up the MSBE loss for each Q-function using this kind of sample approximation for the target. The only thing still undetermined here is which Q-function gets used to compute the sample backup: like TD3, SAC uses the clipped double-Q trick, and takes the minimum Q-value between the two Q approximators. + +Putting it all together, the loss functions for the Q-networks in SAC are: .. math:: - L(\psi, {\mathcal D}) = \underE{s \sim \mathcal{D} \\ \tilde{a} \sim \pi_{\theta}}{\Bigg(V_{\psi}(s) - \left(\min_{i=1,2} Q_{\phi_i}(s,\tilde{a}) - \alpha \log \pi_{\theta}(\tilde{a}|s) \right)\Bigg)^2}. + L(\phi_i, {\mathcal D}) = \underset{(s,a,r,s',d) \sim {\mathcal D}}{{\mathrm E}}\left[ + \Bigg( Q_{\phi_i}(s,a) - y(r,s',d) \Bigg)^2 + \right], + +where the target is given by + +.. math:: -Importantly, we do **not** use actions from the replay buffer here: these actions are sampled fresh from the current version of the policy. + y(r, s', d) = r + \gamma (1 - d) \left( \min_{j=1,2} Q_{\phi_{\text{targ},j}}(s', \tilde{a}') - \alpha \log \pi_{\theta}(\tilde{a}'|s') \right), \;\;\;\;\; \tilde{a}' \sim \pi_{\theta}(\cdot|s'). -**Learning the Policy.** The policy should, in each state, act to maximize the expected future return plus expected future entropy. That is, it should maximize :math:`V^{\pi}(s)`, which we expand out (as before) into + +**Learning the Policy.** The policy should, in each state, act to maximize the expected future return plus expected future entropy. That is, it should maximize :math:`V^{\pi}(s)`, which we expand out into .. math:: - - \underE{a \sim \pi}{Q^{\pi}(s,a) - \alpha \log \pi(a|s)}. + + V^{\pi}(s) &= \underE{a \sim \pi}{Q^{\pi}(s,a)} + \alpha H\left(\pi(\cdot|s)\right) \\ + &= \underE{a \sim \pi}{Q^{\pi}(s,a) - \alpha \log \pi(a|s)}. The way we optimize the policy makes use of the **reparameterization trick**, in which a sample from :math:`\pi_{\theta}(\cdot|s)` is drawn by computing a deterministic function of state, policy parameters, and independent noise. To illustrate: following the authors of the SAC paper, we use a squashed Gaussian policy, which means that samples are obtained according to @@ -141,13 +169,13 @@ The reparameterization trick allows us to rewrite the expectation over actions ( \underE{a \sim \pi_{\theta}}{Q^{\pi_{\theta}}(s,a) - \alpha \log \pi_{\theta}(a|s)} = \underE{\xi \sim \mathcal{N}}{Q^{\pi_{\theta}}(s,\tilde{a}_{\theta}(s,\xi)) - \alpha \log \pi_{\theta}(\tilde{a}_{\theta}(s,\xi)|s)} -To get the policy loss, the final step is that we need to substitute :math:`Q^{\pi_{\theta}}` with one of our function approximators. The same as in TD3, we use :math:`Q_{\phi_1}`. The policy is thus optimized according to +To get the policy loss, the final step is that we need to substitute :math:`Q^{\pi_{\theta}}` with one of our function approximators. Unlike in TD3, which uses :math:`Q_{\phi_1}` (just the first Q approximator), SAC uses :math:`\min_{j=1,2} Q_{\phi_j}` (the minimum of the two Q approximators). The policy is thus optimized according to .. math:: - \max_{\theta} \underE{s \sim \mathcal{D} \\ \xi \sim \mathcal{N}}{Q_{\phi_1}(s,\tilde{a}_{\theta}(s,\xi)) - \alpha \log \pi_{\theta}(\tilde{a}_{\theta}(s,\xi)|s)}, + \max_{\theta} \underE{s \sim \mathcal{D} \\ \xi \sim \mathcal{N}}{\min_{j=1,2} Q_{\phi_j}(s,\tilde{a}_{\theta}(s,\xi)) - \alpha \log \pi_{\theta}(\tilde{a}_{\theta}(s,\xi)|s)}, -which is almost the same as the DDPG and TD3 policy optimization, except for the stochasticity and entropy term. +which is almost the same as the DDPG and TD3 policy optimization, except for the min-double-Q trick, the stochasticity, and the entropy term. Exploration vs. Exploitation @@ -173,8 +201,8 @@ Pseudocode \caption{Soft Actor-Critic} \label{alg1} \begin{algorithmic}[1] - \STATE Input: initial policy parameters $\theta$, Q-function parameters $\phi_1$, $\phi_2$, V-function parameters $\psi$, empty replay buffer $\mathcal{D}$ - \STATE Set target parameters equal to main parameters $\psi_{\text{targ}} \leftarrow \psi$ + \STATE Input: initial policy parameters $\theta$, Q-function parameters $\phi_1$, $\phi_2$, empty replay buffer $\mathcal{D}$ + \STATE Set target parameters equal to main parameters $\phi_{\text{targ},1} \leftarrow \phi_1$, $\phi_{\text{targ},2} \leftarrow \phi_2$ \REPEAT \STATE Observe state $s$ and select action $a \sim \pi_{\theta}(\cdot|s)$ \STATE Execute $a$ in the environment @@ -184,27 +212,22 @@ Pseudocode \IF{it's time to update} \FOR{$j$ in range(however many updates)} \STATE Randomly sample a batch of transitions, $B = \{ (s,a,r,s',d) \}$ from $\mathcal{D}$ - \STATE Compute targets for Q and V functions: + \STATE Compute targets for the Q functions: \begin{align*} - y_q (r,s',d) &= r + \gamma (1-d) V_{\psi_{\text{targ}}}(s') &&\\ - y_v (s) &= \min_{i=1,2} Q_{\phi_i} (s, \tilde{a}) - \alpha \log \pi_{\theta}(\tilde{a}|s), && \tilde{a} \sim \pi_{\theta}(\cdot|s) + y (r,s',d) &= r + \gamma (1-d) \left(\min_{i=1,2} Q_{\phi_{\text{targ}, i}} (s', \tilde{a}') - \alpha \log \pi_{\theta}(\tilde{a}'|s')\right), && \tilde{a}' \sim \pi_{\theta}(\cdot|s') \end{align*} \STATE Update Q-functions by one step of gradient descent using \begin{align*} - & \nabla_{\phi_i} \frac{1}{|B|}\sum_{(s,a,r,s',d) \in B} \left( Q_{\phi,i}(s,a) - y_q(r,s',d) \right)^2 && \text{for } i=1,2 + & \nabla_{\phi_i} \frac{1}{|B|}\sum_{(s,a,r,s',d) \in B} \left( Q_{\phi_i}(s,a) - y(r,s',d) \right)^2 && \text{for } i=1,2 \end{align*} - \STATE Update V-function by one step of gradient descent using - \begin{equation*} - \nabla_{\psi} \frac{1}{|B|}\sum_{s \in B} \left( V_{\psi}(s) - y_v(s) \right)^2 - \end{equation*} \STATE Update policy by one step of gradient ascent using \begin{equation*} - \nabla_{\theta} \frac{1}{|B|}\sum_{s \in B} \Big( Q_{\phi,1}(s, \tilde{a}_{\theta}(s)) - \alpha \log \pi_{\theta} \left(\left. \tilde{a}_{\theta}(s) \right| s\right) \Big), + \nabla_{\theta} \frac{1}{|B|}\sum_{s \in B} \Big(\min_{i=1,2} Q_{\phi_i}(s, \tilde{a}_{\theta}(s)) - \alpha \log \pi_{\theta} \left(\left. \tilde{a}_{\theta}(s) \right| s\right) \Big), \end{equation*} where $\tilde{a}_{\theta}(s)$ is a sample from $\pi_{\theta}(\cdot|s)$ which is differentiable wrt $\theta$ via the reparametrization trick. - \STATE Update target value network with + \STATE Update target networks with \begin{align*} - \psi_{\text{targ}} &\leftarrow \rho \psi_{\text{targ}} + (1-\rho) \psi + \phi_{\text{targ},i} &\leftarrow \rho \phi_{\text{targ}, i} + (1-\rho) \phi_i && \text{for } i=1,2 \end{align*} \ENDFOR \ENDIF @@ -212,13 +235,40 @@ Pseudocode \end{algorithmic} \end{algorithm} + Documentation ============= -.. autofunction:: spinup.sac +.. admonition:: You Should Know + + In what follows, we give documentation for the PyTorch and Tensorflow implementations of SAC in Spinning Up. They have nearly identical function calls and docstrings, except for details relating to model construction. However, we include both full docstrings for completeness. + + -Saved Model Contents --------------------- +Documentation: PyTorch Version +------------------------------ + +.. autofunction:: spinup.sac_pytorch + +Saved Model Contents: PyTorch Version +------------------------------------- + +The PyTorch saved model can be loaded with ``ac = torch.load('path/to/model.pt')``, yielding an actor-critic object (``ac``) that has the properties described in the docstring for ``sac_pytorch``. + +You can get actions from this model with + +.. code-block:: python + + actions = ac.act(torch.as_tensor(obs, dtype=torch.float32)) + + +Documentation: Tensorflow Version +--------------------------------- + +.. autofunction:: spinup.sac_tf1 + +Saved Model Contents: Tensorflow Version +---------------------------------------- The computation graph saved by the logger includes: @@ -252,13 +302,20 @@ Relevant Papers --------------- - `Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor`_, Haarnoja et al, 2018 +- `Soft Actor-Critic Algorithms and Applications`_, Haarnoja et al, 2018 +- `Learning to Walk via Deep Reinforcement Learning`_, Haarnoja et al, 2018 .. _`Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor`: https://arxiv.org/abs/1801.01290 - +.. _`Soft Actor-Critic Algorithms and Applications`: https://arxiv.org/abs/1812.05905 +.. _`Learning to Walk via Deep Reinforcement Learning`: https://arxiv.org/abs/1812.11103 Other Public Implementations ---------------------------- -- `SAC release repo`_ +- `SAC release repo`_ (original "official" codebase) +- `Softlearning repo`_ (current "official" codebase) +- `Yarats and Kostrikov repo`_ -.. _`SAC release repo`: https://github.com/haarnoja/sac \ No newline at end of file +.. _`SAC release repo`: https://github.com/haarnoja/sac +.. _`Softlearning repo`: https://github.com/rail-berkeley/softlearning +.. _`Yarats and Kostrikov repo`: https://github.com/denisyarats/pytorch_sac \ No newline at end of file diff --git a/docs/algorithms/td3.rst b/docs/algorithms/td3.rst index 677a4aada..eae816b91 100644 --- a/docs/algorithms/td3.rst +++ b/docs/algorithms/td3.rst @@ -11,7 +11,7 @@ Background .. _`Background for DDPG`: ../algorithms/ddpg.html#background -While DDPG can achieve great performance sometimes, it is frequently brittle with respect to hyperparameters and other kinds of tuning. A common failure mode for DDPG is that the learned Q-function begins to dramatically overestimate Q-values, which then leads to the policy breaking, because it exploits the errors in the Q-function. Twin Delayed DDPG (TD3) is an algorithm which addresses this issue by introducing three critical tricks: +While DDPG can achieve great performance sometimes, it is frequently brittle with respect to hyperparameters and other kinds of tuning. A common failure mode for DDPG is that the learned Q-function begins to dramatically overestimate Q-values, which then leads to the policy breaking, because it exploits the errors in the Q-function. Twin Delayed DDPG (TD3) is an algorithm that addresses this issue by introducing three critical tricks: **Trick One: Clipped Double-Q Learning.** TD3 learns *two* Q-functions instead of one (hence "twin"), and uses the smaller of the two Q-values to form the targets in the Bellman error loss functions. @@ -116,16 +116,16 @@ Pseudocode \end{equation*} \STATE Update Q-functions by one step of gradient descent using \begin{align*} - & \nabla_{\phi_i} \frac{1}{|B|}\sum_{(s,a,r,s',d) \in B} \left( Q_{\phi,i}(s,a) - y(r,s',d) \right)^2 && \text{for } i=1,2 + & \nabla_{\phi_i} \frac{1}{|B|}\sum_{(s,a,r,s',d) \in B} \left( Q_{\phi_i}(s,a) - y(r,s',d) \right)^2 && \text{for } i=1,2 \end{align*} \IF{ $j \mod$ \texttt{policy\_delay} $ = 0$} \STATE Update policy by one step of gradient ascent using \begin{equation*} - \nabla_{\theta} \frac{1}{|B|}\sum_{s \in B}Q_{\phi,1}(s, \mu_{\theta}(s)) + \nabla_{\theta} \frac{1}{|B|}\sum_{s \in B}Q_{\phi_1}(s, \mu_{\theta}(s)) \end{equation*} \STATE Update target networks with \begin{align*} - \phi_{\text{targ},i} &\leftarrow \rho \phi_{\text{targ},i} + (1-\rho) \phi_i && \text{for } i=1,2\\ + \phi_{\text{targ},i} &\leftarrow \rho \phi_{\text{targ}, i} + (1-\rho) \phi_i && \text{for } i=1,2\\ \theta_{\text{targ}} &\leftarrow \rho \theta_{\text{targ}} + (1-\rho) \theta \end{align*} \ENDIF @@ -136,15 +136,39 @@ Pseudocode \end{algorithm} - - Documentation ============= -.. autofunction:: spinup.td3 +.. admonition:: You Should Know + + In what follows, we give documentation for the PyTorch and Tensorflow implementations of TD3 in Spinning Up. They have nearly identical function calls and docstrings, except for details relating to model construction. However, we include both full docstrings for completeness. + + + +Documentation: PyTorch Version +------------------------------ + +.. autofunction:: spinup.td3_pytorch + +Saved Model Contents: PyTorch Version +------------------------------------- + +The PyTorch saved model can be loaded with ``ac = torch.load('path/to/model.pt')``, yielding an actor-critic object (``ac``) that has the properties described in the docstring for ``td3_pytorch``. + +You can get actions from this model with + +.. code-block:: python + + actions = ac.act(torch.as_tensor(obs, dtype=torch.float32)) + + +Documentation: Tensorflow Version +--------------------------------- + +.. autofunction:: spinup.td3_tf1 -Saved Model Contents --------------------- +Saved Model Contents: Tensorflow Version +---------------------------------------- The computation graph saved by the logger includes: diff --git a/docs/algorithms/trpo.rst b/docs/algorithms/trpo.rst index a04b04fc1..f0d701514 100644 --- a/docs/algorithms/trpo.rst +++ b/docs/algorithms/trpo.rst @@ -154,7 +154,11 @@ Pseudocode Documentation ============= -.. autofunction:: spinup.trpo +.. admonition:: You Should Know + + Spinning Up currently only has a Tensorflow implementation of TRPO. + +.. autofunction:: spinup.trpo_tf1 Saved Model Contents diff --git a/docs/algorithms/vpg.rst b/docs/algorithms/vpg.rst index bc4e06a2e..cdfa2a836 100644 --- a/docs/algorithms/vpg.rst +++ b/docs/algorithms/vpg.rst @@ -85,10 +85,35 @@ Pseudocode Documentation ============= -.. autofunction:: spinup.vpg +.. admonition:: You Should Know -Saved Model Contents --------------------- + In what follows, we give documentation for the PyTorch and Tensorflow implementations of VPG in Spinning Up. They have nearly identical function calls and docstrings, except for details relating to model construction. However, we include both full docstrings for completeness. + + +Documentation: PyTorch Version +------------------------------ + +.. autofunction:: spinup.vpg_pytorch + +Saved Model Contents: PyTorch Version +------------------------------------- + +The PyTorch saved model can be loaded with ``ac = torch.load('path/to/model.pt')``, yielding an actor-critic object (``ac``) that has the properties described in the docstring for ``vpg_pytorch``. + +You can get actions from this model with + +.. code-block:: python + + actions = ac.act(torch.as_tensor(obs, dtype=torch.float32)) + + +Documentation: Tensorflow Version +--------------------------------- + +.. autofunction:: spinup.vpg_tf1 + +Saved Model Contents: Tensorflow Version +---------------------------------------- The computation graph saved by the logger includes: diff --git a/docs/images/ex2-2_ddpg_bug_pytorch.png b/docs/images/ex2-2_ddpg_bug_pytorch.png new file mode 100644 index 0000000000000000000000000000000000000000..5d06cea4d46bcb8b3cd1575e0c73095f9026b41f GIT binary patch literal 134148 zcmeFZbySt@);GEcX_QW-47wXZq!Cagq!H=vZjo+KDFG=dX$ffrBorx?ZlqhfIdkp3 zpLf6G8{as8pFhqTI-UoCd);webN*_s@TX7Y@NuYc5C{bRBYA061mYU}FWMI@4ET@L zjU^lAr~1f7d3k`7k5J^Q-rOd zi-V25i;blbotvqXv!%TqF9$aVHyfRWi;IH@C+B~^&tdOm&PiZKPKiL!As$IfsC%Ss zOu6f+jhtcb+z6tVio?&YB{VN-7}mzrk*luOaWARS@K>nTSub_n)?Y84(6Fs`weQd; zS^r2(65k|8)V*+oX;7v|ME4^3qS2tN*{i>|juEr_+TMcjGWWOc?rsF(Kd*#Gc^zUX z4F9}XMDY+r|MNY=|No1G|Cf7U26rxdo`5=4XHt#swzETdNJx7E| zM{FvqZQh9m`?D7c>Zi4}Z#nmuUtrJ?CSc#YcQ5;jSmj$MeAi`8Oi|ry$L}tKF9qVx z|2&EI|1W>>Bf=Ziq&D`!DON14ILAP>W~p94WTaxLZ+1>jh5wbGUXvFo=kvywO7{W- z1JNd@rUq+lb@#WXs1sN`D5&F~`A)exV(*NRse5o%z^7rrGdT00)TnM#1C5bwxMLe2@Oe16G4BUhVF);#h z@=95Q7V&X(tXr!DDl{JbLk^*pm5xM6&Q$n^Sq}gP1w2I zoVPy)^K@AZ-u9?{TL2=Ne3ekO%q1 zc_HS$b(7`U$Nk|yTT>0>-iI56;{pN#6k=X{^=_Luf`Wq5a&mtbzP%YOH$?lO%8Z!Z zU;U-k;KqJ_e(rQ@VrGW6Cy||ad}(Qkg4+n)>tHQdA%TUSnHhVzKch=o{32*{R15JX z{k}Vw^O6$V^Ty+n7J+}()I8Cws_|S~@MyE|eFXB33))ZntaJaFpC71mHt$|^LOcw{ zAuV^`e*U{aMJnH3ULFhM1`Qkf+K=8|$Ma)H4^Pk4Te?|Kzl@BK$H&Jb5Q>V5HdFPa zz3=#c-H$bF520sbk{9}H-cQ44JHB7i(wXLf?J(A!dtEvgcUwFAXE1U@w4Nmt?9UOs zATsGi6zezCsB})&yA~0^O*`3JUdO*gc{ud(S%uw{Nb)x2-b%x+mzp%AsPt&b1l4))x`{ycu#-e;tWw1sJbagG4bg8i<2zl&e z4jdjH%H}^wcWTO0$&uYTI3OKoRDMsAc+LE?{mg^8(biTJGz`3M*NyK9tj4on0wnKq zaS`-qi1!~*{d3&!)MzhrAF8X9yng*UsD_&dVL#K%PF3lBw1uHRk|TFxPODITezMM4 z#P@lVS8%>^2F_+*`u+B{HirHRc+AXJ8KRy)Gll=Dt}1;L2)1jV$PwJK7_-sTg3_C0 z>^c-K`g=>ghDJsVRh;_uCRG*IBc=G>-e-DU_x^Jl+2@9cauSdIm3RlXc=_4Q&5L2< z2cEmqLIT{}+(HEw=H{Uhq^vlbm^Ww^L@TimeCXNp4n?a(hhIxahXF2HF6O4hix+h8Gnwiva|;XT_V)Hw_A@k1-bZAlL){$f zmtLi%rRASK@pwFd-7M?jA%vh~W5c70#vlrp_Bii|ByXz-565QFEKMt`Ktn}Em}n5i zL`1yG&(CiiA^y+h$5SA0<}e~NH}@_EK2?zJXS>z?$fzi36O%jV7Z(+N7hXPB7e^vz zYfWU-)T}of>g$)RAN2M0wN>bp$m;3coSd8ta)wK8tAGk4ady?TG(XQ^#fy;t&xYG~ zt3|I3degIrg^y1de`-kX5X`HjS`*m&!6RN zY}hU@&+Kg`Y96ZxxWmpzuFp7nsUT-08^`y)P3^NeMR)u&UPit-+EIEKpOCobqUQOFH*8 zJe)g381CT;xYL#Y*_nYytc0c=h!@z)Yikj9Q}uI0MVes|5p5rz6%77WkxjP-58=YHz9dN8_glTuNz^;UX!vimSWQL25S1AsksB_L zi@uX@0q2-wpD+n3$xpa=d5PbTj2crb8fA-aKT-cEBr^VxfjdAWH6SRcOYRcF z1payMi#O0-nvUMzZoF+38XSy*_@2bc`5pDGLhcEbp!XFHDNES6HtQ9Yx6MCW>N_n4 zqKAu-kx}J7H#gzKrDmU)n3%Scpr9b1Xliour1ibp+FBBijpO6v|6@<%^>h>1AHQ=I zw#`*Y2>P?Kvf>k+THONcu zk=ga_nHEl~ks?iHA-mCX`oql$#gA`Si$s!Y=!4U3uPoPsY;4bPS{ z9v+`%bX}Ys5fBqc|Nc$#mR0lm;UVelA2rP0ch%C%efox$mf9xHJbA+h8w!Ikn`>67xAuf+nxd-Pi4Ka*|RsWL2zJ zhb|X$&V1T%KK>;@ip%VWplrq;c;2(iN7V!G1vW8#jt!M_#FZ`cG>QmG_DLQ*aE)9% zfhNL!lltsP_0i$NH{&!hZ=9p;85t83AyqXs`uOD5>1mp3E67xQCp{Bf!P zy_lH3h5zYlKJ0nIaT*#LkE1E~zy0D@2t*8xSj68yUG1%{^j+Ku3b{3FTsGt|8!dlfqh*J6IcXPTvEB6l~1O%IdN;DI8n&It*uGs=>Y7V(@08 z7wN}6Eh{S=xB>#IQzFJEH)T2^-XCu@SXvAh%VhPfp6tC~(>uORBj#1Gm#oebtj;3i z;XwfnIq-4nu<^zy_R7y(>#V-;0=5)>YZeau`nW%T_^pTg{&KU3x^GXnMvz8AbKT3p zZY2G@vckmnT$C0CW#mBgqqNSun<^^twLW9Cx-~Xm$PIjg{Vp)bPDI1Q3QT^QnLK=7 z8X~G6TxEoFYT6-&P^`VYIEx$?zdCn*@4c2T=1m4=hkV@4&CQE zkGB~w%6c=zY3JtVB)xZLTe9$;htP3ASn5U@Kh99WW8 zPeezb9KgQhx0@unIK_W>b%{1u_=a^Zlqo&l#;MusALUpX=#wOpLG4sH| zg24s{7nfIh9PMuyV|PXjFdhHSduGj)>QBsWZ3O@`8ZB^UV6iUM`%7Nk6`K*%>?_Wi zH#T6+s7%AU#eANB=ia>+KR-CiKYB#g_w1uOC7@4p?ZwtniS5P1-t*om03Xt-Jp<(=bqj~@PXxOOqe0;p#D@(npE58eg z$3NT00;8c57N$(%(3ezKe{1msmz*Q$%^S7m7G-IZn^x6StQu^D*REuvPsQ;ldGvnw zx3&`6j#tU03T#&Ro{`y%eL{I?HoOg+uEM;3+w<~$ckED65^CxJ z*T=`ka!4cCu^hkLlpAiBo({zCl_%AmjgaLYHMaC9;6wJSk%73~_! zU;SaRvE<|MUhNxWdxns@IK$cYYY(_~5yMWE;O8~wU zeA~d)vZ3R(3Bz;$m9K0J1Yoqqzhxylq^?ewj6*Lh=GOfsVF5zY_tb)=40jgl2DT>a zik|FnHuQ_rhR57$z}ke4p;zlLY~hT9;}w!BI71>}HCj&Cd~tZD7a8NSK8zNKfloZn zlY$47ICKz-09kH+K6cmj>oK`CVb@h8azEVpbNshmvOzWX`|%v6BOH6FSGTQyTQ{o& zIIjK7d&zW0LPFA}krY42Z$q_ zeuG;F@ZjLG=F{()6}~<`4nQICsQ9pe?s}air>5orb%M={rBiKXy3;5oD(V3DF^ZBG zgOo*mV4~~mS9I9M?X7{BgAMNXfnE2q+Z^ZGgjKwccN%eTpnNGeye+*6g@eNO2{~uP zGM~+8*v%ulUaNsjLoQzLHyeHzr_XYFp}>Ey50)SH4qa)P|Ct+^jd$GtfI7-)(dqhS z({Pa{t&m8wcB|7x{ohLrB0`nF3Tg#}8EG+^+^W)y-{~K-ib=E-A9FUSe*N(S@8a}8 zs>SbufQl*to;<^Y2Q)arsDSSr+}-7Vcz9$w{^@jZb!8tI zl{WDK%0>_)Cd$&*Q}j_{?E@2Rk}+5G-0vE)R1J8^^CW)H8L;c#)GkpE@WfZfnX z{6scwY&zbOCLhoHfKBNEFl=qnoSeR6V`B?OWU9Y=S7z<3R+>htr>7SmM{t9YaoJ~+ zJ-NC}=^p=h8qV73L6MO6;T<@a*xFhx`8#HC%cP)w4pc776+CY;3+QV0KDz$oy=chU z(Kelk2o5DBC7h#7j_h`}T@D{*(9Mdp zweyM}SCw;x1_s_<8!m}Gnz4_Bdq>Vh%=6&Eb-;tI(bOY5YeS0A*}i=JN&@?$;8~FX zZ0=|%V9WKzfbq&4miuhe_W-SRgcHjJyV)H{=;%;EkL*gnkK`;Z@*q?wDDeEY;s;(f zx3-33Z%+$w^E%XTL8eNbjQj3(e>TQ`AsK_CV+AkyG3Ax&e1NvW5k`_V%H_EkFU&|mT2 zCj|$4eDkrgVnKTe4IoUiFn~?}nrA)L8V}KaPXx+ChHM5h0GmacoL#`he|fSoGZQ{~ z6pn@C6@iB7C$qP-Z8-mRdu~)?pc-`y=J;os_dlQz_cID;36To*>l&|S1*u17IVUHF;MT2R#Uzf| zBm)7%)BUjmgBHX0e@snGUO;&}K3P6^%0!agHUH-iAuVkzRPPr@Tem<@VWASRQJ<|N zPaGIX(yFrH0N9?hH`=~nUBLw^x3>{xEmzlUrW@%++#e{jOp0^u!%y`_kH0sOe4xn-pUb>YGVm2Y{G#|(<8@HBUIG6V z%Ts5eOHlZ`_KXQ0MGRC;qx0i!kU4Mzmx?rsDr_grHYPq}uCA_%#D<3U5W*ISB=0xu zNyMk%?YxmLaze%D=4Zi21q_(@>~NECb1^MJ{T)sr9}pKzP;QM2^u~H-y^pGcf;`x? zc?`O}dRW*tNZZ<;&y832K%Hujq7Kh89(eNk^zcSPwN> zHkztb@Hk)j;TP8^ssrJznUgooxF$)HG^hU?0T9TJ zB!lZcf6e#a$>y{I>;*n&6xekg-#47_cE}3;rK$TuH~o(^;b&DX30a5RSESJ8eqvvm z9>IO@jJd!8sQ9BgL!+qUYdA5`0flss=a5u|+n5K+_B-pA?8E(4V}Jva_5erX;>aoP zAF`@E)XBJYzsBfvf7=P~a`nyY*HImj%Ydq8@S8ie-aqh-nrp|Lo9}QIX%+XClzjfB zf7=@PPIu)tY}oc%#~hbe{-@_>+rCk^?kl*otP_)x27_NPJCKEj{Eg>OJwH711Ht&= zdtWtG)t$}H#-K@QG_|&D7#U@Admp;kOdhMf_uZ4qdinxRqTKP1vM|QgzUKi8iJxE2 zUwHa~r*g8gv+HFo1Jvg|9QW=~y}5TXH{(ycFgEg3JJ1c`u=L|mV5wT6dUlf?HJ?S; z_XJnXq2%P`FRiVEvnN1oBQi7dB`2n)0%cr4{9wve2>hykt<5MmEGmg!JCA9vRs1C- z)F+vxlRY}6_oC1DJIkLpVI1%O>K`ajF&>@Jdn5lI*2Ab!9%(@op?>)T@iSXyuiSN=V^Ea|s*C5InFNb(CHi4yKL&N$&};GpgL4SnRB}PTITR z4+;*R&(F|6>BejG+aK&&8LJd*Z2b6;Bk49Q!5aG+7WG(-qM>9lZz?DkDSuus0c)Qf ztuU6FT4Rz@q@HetoxZ|#+g$|v#j~#Is7gTDB3cc z;|~g+wxV!z5AGa^$HL{|^vtWNsg>JJu>kE<+V&8%(`#Sq700#3#Z~k<|Cpm&d{5{2)UD^}bvvu39d?&?QhI%Z_w3P3?1G2BkI&8K zvn^_9j#$e#Z>0lQBS78-0M5|Gj*n(A0Y6h;o-h(TG)tU<76;3&%)#-S=Z#tCWuD3e z;A?|fQjGf>V<4#g4vLOuXJBY^Jl)4WKV9bl9R`u2JpjT&Kt{$T+elRvpVB+NCnS2V zP;6eJNDX{4Wv#8NyDcVmi}SgO+1Fe5-6)_CLZbv)76Q0py!!0n_9%SKYo^q>Ibq+N z1dwKB#GVcaI;L{J0-PciY;5P{zS-H<3rQ~Dky2Zby}Qga@X4aoHx-kWTzGWrt1DV6qj3=d3po=Y~K(p zSQSFJretB~2&Uts4srv~Ck+Nb27&eY!p`oxoE&47^$1FlW?5*)%x6ir40ViXl~TW#KhL;DP8WN6pJAQdqHO1Ev60joO+}@)v)l66ZfYA-{{1xc z1}%|quZG6(&cQ`__wwIg%0+Q$=l56(0UM3h*mC;#ijy4j?(Y`|!L53++~*6IRxdj@ z51QG(vjDBe+mk1_&^@p>fzlhnPXL4G1iWHsXow;x=)S*ofergD7V19viZyV90Si9R zQ9C0@*P(pPIo&v-+|}F-x+f&_Vj={)8}M2PX11LO^qz*pC=!-=xEZdEg%LP!yTs( zutEQ*S;p+c35zZiwx)4UqJH-5atQwV&c>LUuhg<4_<~RcZdd8+PeDDk+us()8Y&uj znUFvVwluue0MOOrlgD3`#cjzp6TrR^Z=3cCp%C7+E;eW(1ys=ndK0+1NFK92o$=yN z=dJAGO^{m{m{!Jg{nN`#@uzchb3vqu^nC7oy6-qQ+SW!y>oagtvP?+efX1lYeDbgz z7Jrl1f#AilkVvZ|Fszgxo2d7pFc)OGc4bXZXPUd(`(7o2FSQNatea zB6wfmQ!~ns?(UZmcj$dXustrxG&s)~|-irk%aY_Zj`pTG%~_r60l+{8qK)RnA>8B`X72#89~Dm{wAlin>7$s5I{n zt*fh3X|5SbvDS{hdE(lP<4Q3Rh2=x=anh z_tW}hLK<>?Gq$R#O0VmQvT_%#{|5PZP0eD3!yL}&*x2gkl}H?q$4>rvk#)lgx$McaAl5vaEP4{cPk7(141n#?EGU$YeI?lhwk)X>MR|TId=Qhv$k?Ze54=zJD=eN&5<(^-w!6b9_Ij zgZdw(UAbhqyYLsmBOWH~Fsm6wBlhICGDp%Jlp|bQa&k>J{d$%WF*>0e2NO3;~Ei zQGkLs+T=~`ckXByGeQB}FU`VoFPYm9C{*9UJE$#S+{6gEvN#(wMn&PFV`8?Bjos~E z-aFU~0ih}7&mZVj6haP6Ah_L51BIH#YPdK7YVpeC7&*9)B%dDkrwIr5r8g{%8n7hH z1FMh(y}E|Up!r<4mtIsV`ymU5Fbh}6%zR= z^f@Nsr@g<0k%sNEShk>|E%uO0<*FKPO{TP^-bNfRr9P6Kz}+4$K|r~{Q|Sa9ZqxP9 zj7330q92J&2A{jSuKjMGZOtG*(d$bSzGj<IZRulH9P9A;juBecMlr4sW#lz4NmSEVAG(d&?p&AWSILhg58eN_cl z`U79d{c#!|6OWkImJyIe2J-tE{}s7VFJ4Jme;@Y#eZoM~nVe_e-LoU`0VVVMGyE?( za76r17AXP`&8>$%@&ixjtO5<`LuKgFw>Mof6RdbN=XDa-Lal~&WzP=nw+eyTL?)_{ z2L%QaLno3B#MsVMNce~}wj@SK7hFw@j9ziJoVSRctu|k}`aig4YHp5X{V*q)OYWSA94wwBEiaj0;^~=_tQP>8r zK>VjrN6;5NZ^A3$n)vw4`YoqDBRT;p>Lr<&9|$Xh)_JfsF`}(aO@l#xoh@s*P9^lG zZ6s4dN=s*{d4>W3GG5qtVWDdJ1Msorpd!2WE#E!!^Vbf%2xjKm$#bwVGBR?wI{`KU z1%R=Dq9Wb*L(fwAEX|%?({MkE$gWZUsLOmlTq3x~u335$+OXw-Pr#3+Nty>g*q+C~ zd-n<&Lt9sV%+B%pg^q#sL`_qxam$t8eZw*-kail}uegOQ@|A&N4dyELQ91}Scdr+K zw(H>D->~+HUOeOTV2Q4>UZV%i!zDLrNDj$BfnV8#vX&}I|K!P&6R%q)MY{8+i2wUaE-|Eh`e z``f=eJ3mGa_^rKgyHpGLy4Ety%*+69#V{4!U8nH?fRrq%ef8kgiR)aOcKWM(@TOYL zKDHI6mQzh%@fdj+RwAAHFAqDa73t)d&fC>w7&3_}&Ol!OF+h*jLyY1VwZ;8a&|Ea!*X0@A|H?ned_=QU!|12f9> z)Ku^0>25F2XQZYfOC8_7jm;K6L+tGDOTU;+bp!ll^}BoW2J6{tWvM_cgv+5TtKGGx z3?~-V_@@3!bLGp=f-jhW^bk-|hKGgi=p~s`@f=RP?pe%cn}8_9QC3J6h~Qde&knaG ze0F~+msbj${NQ;;_wCET=|Y+=hqi%yGY)*mw4CkDhF1%m|*2@!qO5Lzo#{np<1r)=icx z+ksZ2?TsMg^JWr+h{Wh;`y1CGJeHiiB6_BHhbUca6=!0ixdz{~c)P6PNan}seIb7S zp#q7K$-}aqSb$PCGtJjP5*|G9Tvdx3Q88@1#QmuInMM-9b^A6NvUY()DdYQ`To{Gt zNR&-JK8Bbv#^~GYlWaCw;MqS?DqpsWGcepHdgZsu@`06NpO*{+5gvwjdxD z@w`?@ZJ?tAq^L6H15yfhcHFWECqGucW%If_2n)V>CA085%jcX{)fNvdI>>6?n!LYzcxRw4u*w?_ekfj{FDg}$d#&+ zAbc)G<*V>STY^9G@bHv-?)8t>?U4_9>gwvUX_d3}M@&gezZJBV#egtO_pfL2#sg(f zxghhV-u;P_I8DeA2g0+uO#P&^w4sYV2QA^c4Q_Wq%uihB(f6I7pNI6M&|oSfowIUn zFm4jHskWovw?!VMbdg8u5NZd86OgN@+1^{?ad_Uta+Y^`GIjjGIwa!(m`-g(#=bIl zr_DA#Z)S-7VOqDobG9yX?@jBEALV$Tic^A2AA@e+eBoBiw2n0Y*mW91AtjRyf1w7T zLJ3tN%p4eqW5}>R%*fK?;M^5nkUPX1zVTosO4vDyc;owJQ-NAOjez~M;oAz_`OAxj zWPa<9Oyyf3A{h|IFSo?Zn|Rzq(z*ofkA^L~<8fgT{QsL8vc*^P`@vzUy{SeUY_ z8rnpX^^$JhAE|fMg&-I7Zf>&quy(J7IY9E74W+W^siXeUa_c!uic^7eI zN}q0sZ-m2NONXm8Fn)~BkFVOnWS3cnh92U%mq?=cP?Z++BK3k zpiI63hl@~!zDOS4A|_wmf`bnOSe#)x=^d(8L{iVO6Yi9#Avl1poA_ddE% zQF!#sD#U`gH{*Ao%=hf@D<*AKTYkpjPhAQ+QBgeLX+-0RiHXTPruUSTl%O7v3_ATT z(G>)hA*GK(``eZO0$85C8Y0ela1sJUguNpsbQ!@ z03>)V^s05DK~tpo{kiH9jCa6@0_y4_^$ zb-KN1aO{eogt!=ftCvevy4Mal5&UMii_;+rp>j?UHy_$2uM;4*k~I~nVdKESUuJ9hzpD$uz_Q-0A=bY<(;Iu=!xeIWVmpxzt#^;syZI7N;)0zAHvl52 z8bX>`lGg@9jq0=kDx;#7M)sYYy0yQ)TjWyX0SRgSGTw{%*9Mpu3vqcxne4%|JPD-8 zkr~o}=S>gDViFXt0arF2a0O3}JTb6iX7^r z&&7e(97N-SA&MXVk;3dV*!3Wml8-~4h1cvnELQyTccv$K2r}(=X#1C^0(O5ybIt2K z#U#7(sw$Fk@Z+TQ^lVB;KIG*Y^}lbdWc!!mHTT+~d=e(|(?BR0Xp{H7HxdY8Dj85NnRWbHz@rcRCvCFCzwQ^puDg%9};7y@`-6Xop>QDBrIuU6V6jQ5$K{zlZ=l^;cbxvMR|F9tTJ}`6`NFu z4?RvYWD%A3srr!UTe$go4=TLhpwmouw?o>4bZ@d1Ao^VHTWsljodpcmVG*Qae_Jur zgWVwgXx94|dPdm0#}Iy(@fL!$MFzpA;fa~= z-*tqZJQ}2>E7hs4FYujf8@uUGi%4l99xl-h2D5y~v$){7TqlsikSFOr!H4!OF#d4W z`;I>O&FClvyW&>C-J`GV>O-?O;m0q>OqHg$K`$K zqe9jN2j~P$?+8m>TE>*jF)T7t=Edw|2)t3w_=`)t4MyID!BTz9!CH`gK%$k+vbD2| z0T+3V~KzFfv=IX^4KF?#Fg7D z$^eBYzF5M=olfa=Nrx~tYnTJJv zW^QW1o%*^Dq6qmctFNwALTjf2c~2tiDIGgIepVK}kl#5LSh5O?>qphu#cxFo+uXCG z7OY1&jJxA5g?x?)va_=@HHzZFZH2rQ(o=;~lUrHe*m$Lq6CvR3zV=(&!O4kiJRm@j z+v2GK_yD)S3rG4*%*@Ol=g0Ig!xIi7BHeed67_Gmx8?jw+&4f*&jEP&?^_4TZZx%j z8^CS)q#pQ#l+|IHf-(@pmtnlDOfndULj1Df=JM7?n(Opu2Z(lo>D9f+L$9S-ZN={a z2F%4z@EIYKcmB7_50)j4cr4shqob`&3enmOCN%D%pL&iBx}OC*@;;dfXcTGaSGk)$ zIouedum3zbCB|htegp54kkB;esa0Iu>zHyV9YhQA@vK?Qxw`V_np~mjVFh7)mOt@$ z&HI1?J7=n!H5-E3+twEByxRyt!=+@AXK9-`_RQPJV4A&TMNzum z$fvr`x+mrLpFfEdlS)GXfMVhIPUf&e`owy;_{%$f*8S(dz#If`akejgqAtc3R6Aj3 zGNbvgjv!lV-UOm$J)A<+~6S{NvuHoi*0{Cz+LN_%f z20p&X=;-LusQLa1H<)(qpekb1IftKZw_GLup|OC(J;b)_R;pjvJ>L(u!0Ca@by$pO zdAh1qqG7x}o$g7tS*TMj^4qF9&XbZbz9pj3zP+!H22Q;bZ0OtQFn-ilVe;)8D)Y`ZII#zb6vz{vX*1S8hKhK-xNm2*ZdzNohrq| z;!gOnhYk))G)0i^XmGP_2MrJAnwfQ=iAYpk-2vd6|J0IBSyL*p^p#Hg3j<3vfd?Xt3@mp= zTz5N84|E4ITkm5Wd@e8lvNLC3H1>%f+&Ln#8q*$(x#)Wcn6$$U#Pol^?tzARU8eV) z{ii{=KQaMn8v!-1LGdig{ls9Z(bvZ@3RY~r%W9%I*`txtDQ@FVo4_s*pD1T+2jgmnCC*T-fvJ^g(1KEDHkFc`oT^o&Qu6qRk_zezsR$8XCo+(mlv2udrP-0el9$INiQHEi^WGC zOvP{YuYW_T#FVPQhC3438=mIK_A(@=m%ih-hOA6FgxXSDDL;U^*_rovVs7!--4Q=z zO0;~A+-LWZcwo^7LGbNT1#IQOP;%ZBbXpj3aQbOLDzZKkfOOryy?!Qq*jox49~tV0 zR8eq&){yuWdg9tl!&uU(!W|-))m_8)BiAsd*{XXoKpcov&6P0vh&orHD&5%=$EL=0 ztf|0_76;q*oS}wGBfBR|iQpnRec5)XSCPqdi$8hG zWxb*WC>=nJS^Imx_5S$5W%^wSpm1IJheHt1EPmzPLMGe7!npqURmkp$-2PZqSzX;x ztnJJV&gD%o*dcE&X<~v7*)lMdpF(oeWqTU;@jJerqfyvGJA0{s=(a*{%Uymg(M9oq z{0K_|(UT{AJBNo7@O%gJ9yg$J8uRk=2WQMkL#m()qO{0~C3prHd3kvSS`|c8f_4$G zoUb=P>CLglN?3H*bK-p+Jljw1QU{_&m#cB{~O0A#`=OM~ z1ziBY9bhp51;tCSBOxq>o}hpYnuvO_7WToq;6ng|1-hS`CH~ac*Bh=6Pl3;c32Y(c z_*ko8$O_!S^?lo)A1DvEF2E1!naOoqEu1xOp=M9W47p=(c*6R4B|P6`9H8Q0dBGo7Pbtii0; zX=5fsyZj+T{48Xg3zx&2K{U@QWo(ALa+|o)2SbaqN(_qtAvps3Xt&3Dq4eS zA?qpk%8@}E10BSS)jwE`91_M{TyV}KBTnPuAf$?)bLm^8CMRP(QwE_njL97w17@eY z41Y|qA`=+s`O{lluIS8=A2Xm5f4Y*R>~Q60CUFDi@xX1!0g-TJvW^v`h-}Ggm`n*c zLBUOJkP&j6{ZjU?X#_yRf<8WLBqRICgDZ$eAG+pcP~qfaP`)?`y%Isnge&69hnu=P zduR1eGyZ^}5iH_oIb3b+KG}RB2on|85%P+P$XL0<&TN2anJt8o!Ixzd5%j;b0ST?@ z=@Jg&KLc90wm!&7IO72;UOG*at<|KyOj^@+1RdNA(z?5fiER@T)YO7@k~;51`!6Bm zK_Rk_i+589ohbYRmKkyqRecv*qQ!;uCn?@3a^4rF`?DF$t*jy-a|ZOn55{=f|CRRz zB^qXaNg`Yz7T*qi5TtqOy(MlvfGf zt0hRT^zWB?8&E0v*E~CNovr`6Y~*013U=!kT2c(;t^W5bf)dSNJ^8<%SHN-)?_5RY z1tRW0-!BNu1@QiwCB9Uh{78L8{3R$B|9)jNw85b;|9dDL?RwQ{lR_d(I0Eiq9D~~kY)y&edXluyk#H#KRofXrvOF6Rn>p0~yC( z%HG(UbL}l(!w7RC9A!g#`up_&0xo{PizGgva-?@bF8vj^aWnVnO&FHHCf-sOd{|{E zKHGl7o9e<-ffr~mgdKPO7V5I~8vux#8y#&)QuqWl8dBbBfL%EtIt_LwPWV@3$Q~Jj zfDS5!4B1;->lW{UMk4@|KrB^ddUemDD(PADVL@D>A|D@<9CC^;TgG{3?O8#eG1Pkb z$B!i}Ruo~2laR7{%{Of4WocKE1&ic^Mh791SeU8L>N8d?B!NG3bB03hTX=h7MS~?-qEW4)u$O$I{pTDGF#%L|<-N}BXR#+{c$V{gQ>+Smkdze|Oj}K#ZA*Y4T9pi|tUq_nJTQMYnG<6*RqLqiVE7%rx_RyzF$vaYoKf>d_FMtq_)7sJ*otiBh{ciTJ2Qr}# z)$zN9n?dPaMZflwJ#(OIFtn&W{Og3F5k4e9{8EUJK?NuO5}c!*J>5uvB8HdeGVr^( zqEeFG%W?9i1bHg6u|C>N2ze}eX$glNso+@ypd;Ln3;PWtIc8={@{dX9PWJMJy!HuR zy`n)NVD=W2gKGdDd=8$mM)xtZZJ>d_(t&g7>EU6tG7!JBYlRMOv5CotKfS4%LXPtk zsAy$eNI?f1ehKcOL5?%Shfj5$dsIbYw&O}rrmLdNq;}EK)0o$peN+I0z$a#qXzcWN z1mrUxGAOaSD=h*5lMefQn*no8vK{+abEAcpZRiftkmIjvCOpRBhN!WnyN= zwb|^83DHUIVqZ*vnuEpKihS0?M9|7RKbf{jar9qwO)d3O57gLV!iO|i**pnSM}FGF z58DSBvCo@M6XtBNn5lI?U*3RGJ|)Q9xM|foFz+AWC!e9CqChr32(Bkf3X@jD?KA+* z8wtkL%eL|k>WC%ZzL7VTe?)wnoi`NKe<3ngZJ3xYh(NU&mNl-S;dY7}I02-u{DY%! zov}kSqOA&M1RM_5?nd`A-&wc?II^pXns28KK6gabW-uNCphjQAxRve$@n?K;o>L)A zs7h&SYRdDOoxOy`hVYi8g2EkzR*;EPY9bX$&kw2ZA8TqspMz|=vXU-@h6b`Oaz&IS zu>^?<@xqz4!=jYstn!jy!KuljJe0(uX9GB9R^1hSmiWujxYMWIYNpv&&Da?S&!pe- zN3wlpPR{9V8#sl5pWbd`Z^02yhCI|>2e;mkHRHiHE!#1K&- zghV9E?y*ed(jSl-lqLoTDZP(qR8tDeU$owY5*}~q?ftp+0S|uua&FoAkemC_)y5GI56{rr8pe~Pp-mK2e}2*0yt-;hm8$k=s?HfV ziR0QV>}PrTFwBfHVCCMh3Q-}w5|FAJO%!_p;da<9t(E4ngw)h%b$ZvSsqcGu@=Nx= z_f;u-?%QX4a&{FAgFGk*m;uCx7zpBJSeP<$juphiK=@pmu#PcMswGQws*zb{mzCKZ zw;8XRtE=hI3lR3>jc_^^%Nd zI(9S7;V_6ZH&&SsB?%GR6$nF>lap4+T7A4$G;DS89L6qTg@$HkVtRo7OhhyfjPKRP z1{)UoDe@j2tl1&^9(?M~_1m|NhXTp6T&)*d+MziHcg5Tk#)c4Z?uED?UVBF*3f6VM zFc`got|@6SgFvQv_lu(sH}7Y=uDe-f&_c(6h1muL;4fs9Kv8+9tc(DD;^8$MP-gEA zvXRo}$0E@3kTZDDfde63fE6r>h3j3 z)~;=F0hS9&P3MP-JW_$&)5t;e< z$bj*kJ71;15}uw8*lR>bJ$!uvpS;xXWoz~Q?1&zgfq|%p139E?2L|U_p|avrc_51t z%oVCGQd|=!emOsbi=$KTLjV6T^&QY$_wV~3TVxfX%tW%IWQ&N9J(C?WlI*=QBO}=> zWQ6RMy~!?nZz^PEumAn-`Sv^i&UwyrD!s?+{Tlau-Pd*9t8TT&zWjSw#|HT5nj2ka!0t3m+r9GYX80u01nZw$j-I#YL*?zAj7q&8SETmeZTjQx{M8-P^*# zeNY^RAt(RA<~Pt}r9Cl1p!L=_t(C4CTZwN1ait_A_~cVWprmwUFkVW@=s^gzvsNJ8 z(q9SI+HY@bqk}9J&e+yaYVF&Ozr_c0NA52gc#A@$ez2mbgJO;`1mnv#Gs3W?=MJxJ(A0wLTFc^*B+gVs@#*lP4_SzbnK1 zVn#>r{UAf8(OWPj1k^LY({I62%HPzKXt^mN;nuW1aSw!@VNj(R(QHWWTS%q{RiAOqHg zf_NCP|2-*t-o3x6tY!6z2=9a>?J7GOlP@tl9_&#UF1OtxIJ;$gent(r$rWIfDLHim zDcp9MP2ZiL-{nvEJCsU(Y%B$ozuuVbI0-=^Qjm@9Jx;5gy}f~nNv~P*eXIVo*_d;X zn!XZ=W)(@%TXc)MfaijWVD>Ysu2LU5NUU_)Tf5K}Mys+o!xRRkd8ph2vrVSgC?LQS z)bAn6S5H{i20tytih71Nk&AHvbtq8uOf!`j{&*nES}ys7I$w{7tvm&)x1jGT^f(oO z_|E`F9dM*8=;-p~6ABa4b?$E;Mb;_7wb&rmV^TN^B@0(K*|+lSPCB-YXcRSu?E!&q zVKpqdg=E&aA75Q)dwXAh*4S@0h1-3Lace1a|uFI29-$kwm#mUMKN5GUImG*wa8B0Hlae3`NxJpNb0v zic3mhq4gcC7KjF4JiE2?Q-u$Z<-mI{b!~sqf`Af$@+W4S1%)6?2K^iO;0bAr#pL>k z38E^UZhH;90_kU7t<~ztOD`6+r({Q4PC^jXy-M!OQkVm$iLO#=DypuUz+ONq3r+h7 zqc#p!ae=gH;gPA#1QHl0SOFrL)FnH4?7t?u{nNvDM7`)%jnD6 zi3Bg~&gUYSB_@xJ+;79@76Dp29%n%t*S(7S<~g{Cu}6`S6fvnC?fu_s8vyc?oShw$ za-${OweUv6@Sg=WF4`h?IIL21ILh-f6hY`L8_jnA$&<57qp1(1YfHec(*@>;N z*Cd@~ipc4=Tn)X-Ds@Olu^~^;AK~!KbSag%-q_?L zxCT)d=GC9VUQAL~H#MkhqiOv}x6wEYL@SWrs7ACiLg5DVP!cvamu!KTdKbcVtd|?Z z*4EZu@G3yjE^^$E7jhxRq`TK;HP`fIwDy?J^2Pcas`KroNlRmDYHEn3Uc(ODG*yco z^7b(?+!yBS+-zfQ$JeiD;R#r^79$RFNjXAYcX@a&^O(vTU}=C)0obxImsR8a0h1w>@R#CDkzy=ShLETrVN$gNbyO%)Wqdi!hKaLs&e7A;bK7at8j)o~Sprl& zxJ2iHKp31KEozqIa)$~KsDj%2QyH|3tj0^DzUWxrasZ|PESaY3o-YL*ERC_2kh-ng z0W*^2goer!<6=VG2Ze4|Ol;jYpdK`LOS1KXvxbG1Xf-od%*~O485ClW0{lC0{CmB) zcw@58{W6HbB@GSdJ&l5kOnSK2CY|pujXs6U#(%tQbrxtLD6fQj1@v$nEGaL*&(hyj zYbgaM0xh|hzdM^(9vcA6fPEKIdwkd{Wg1&vP-T4KbnUZrs3xRHylwH3X!Rt2;N06<8w0-X~)y z31FS#Dv=wbE{O+Tkp^CEif97a$??(ZMxk*RA=rO4LHGiyH7EulLtJRkY$BI*bUAV= zCOX>R4}XP)+I7C=Yo29wf4?JgSmgH2cy1WEYwc`qKHC%g?h8^y0HW#}{A+75>W({1!s#&HV26@P}_kA>u~5x5n@L zYiNoutQPhdIN%I{I6BgpEF1vEr2C^2g7@i&8>XkB@=e%K=N>S(G*l_k2mVgf5bFB- zKZI)GWiWsvM=B>rN7xX>`haRT|Z zB}ByjK;Va<{#%W+Y@XIaz%7-`nCH{uA98bp$4XWVdajBh+OqO;hyHI+J_GxB45Oxi1w|p>zRMBtjDr4 zgdVKSTH`<}Y`}CxeGtRoT$^yjgqpnsq-!4vUZRYQE<^lz$-_fiQ}(@N8mXUG5ck7} zZJI@f>>lUtl*q0H`w%WBIug1dp)O<>CdOhZqMqd)axDbch6{Z>r>9?lLy7@7n~+3^S}`mxYiZDGCJ#9+BpP*xZ}y$F*o?L;I_3f zaq;qsHT&yo7V6&yze_Q-&iebxaTmXS8Vd}Rm6CDc#X85mg@*Tc)jX}TqoEb`zm1?H zh@hI);bm&$>wpq&aWL3SwTsKDt3zAv)58==>6@%T_GWS==?+14^#~Xo@Xf&!7Ow4c zmIOf4-E3)@ThfAi}4^SZnaV*_BjLii_q zk$^xgG$|^|&-0AGyA&)mi`)*M#DYX_J%wEQ>jYoz4NxU^92IMZ!|=lW-hikkmqXP3 zh(aM*Nco4h)1z^ap#}Fmq~?Bx2|lXo^Ug7uBn)jJQcl~ShIK3fV;5Jh%3{nNjpuT_ zch};)5Yn3LNzR_hNpvt(Gnpd4dbP>^R|=dDSIIe(O+oJFpjnjmV<)VA0qhyHz<|o+ zH$E<3y3OJK;HQD$sga(E>btoz7BVt;KwQf1=VW1drxy6ZKySkS zA{opcEG^$8gsNF+*(KHVHe*K9|Q_{i6Yv~Rzd6M99 zu}BwQa%3k!5OaI$2?#({&UN?hX&ygB=*&+)0EJiT%cz2ao*r$k#!Iir$dd2v1Ox;? z5nxg0I+|}qm7zDpgEbJ^U!<(as9qBA-#2SBMd?Jqu2QDgEyw?|QZAx{l zq{-g&eUB0Mj*ZG?hjW^#{z0hRZnA^~Nmx6E6Ke5`p6(wzyEy}`%MZ+V{ZjLnS%{(s zMbD`Wt?)6A5`%?8uHGdCvo;FDSHEKx7uv%L>(-XH{^Y#|7`ri)`b$@Ni{WjMpfL{T zjd_8PnMZaJHQP>WYHrC{6F{`Fus^d>#0}@3SwZ2wNM1kRn&v#M0ZrGTAs8?Crb9jeqTw^I+-6LW*g3wGJK8~Y>&a8#wck5 zaDG4lirYuapE3dk=vF9C zZvIrM?5z7}`0neX^doYHyALi>1_+y6_=MZSuYBW8aYXDpKZ$Ri=NZrF`R`tnie|0X zOn(G8;zugJKg)VTfduMP5!p*(mRwFk`f^FB#sZ*by)&Hoi{R>=#iwt19r@$Q0kFcM zb2N(v_{lob2jfk24C8kr8D$)B}PNBDQiiD)L96Zv^!XK$b? zD^+hfyv+FyfVZ|>O79uB36M8hgEh?A7L=&Lz|z!e6?SoVS6caY;IL{?qS*L&QN3NG zZf;fjQ&nn-cyy$L)%Pm!N~Whj8_Wm8 zJYg&ABk@@vT~BNW>nQ5WD4=vZZ7-6nd`RL3&o)Uwv4%wQZ>&TBsLaQgonO_xw6Vhv zERt4C6aWL@eFye?ILzy=bK)MU#K6Qa>wVn>3OpcvNvPk~qw8;qy#yIsmyZ`ap4kL)w-9Hj%!aAT zIX7A`b;P{$!&JKRPPv$_v%j}~;Egyy)}7u&w+xJh{bW{ax$^R|ykHwI;x@+P@LouJ zNI=AdiiKzoK~;E6AOXMad3GJx?><`U8rDQY5S4ewXSmUd+3VE0TnN5W#Dh;2x@-N9 z3-I~#=dE_yYg(=*W&?-?VHf9sRvaG^o7K8tWO07Vpue$5U^_83=C`wv30Mgnye4m? zm{qLnx!m2|hrdtEQ(Q~<0Doc5Nw!qw`5`}_Lr6#{?t`?Q-Hq=91NxSU$CLOP;(OcX z9nOSMO=Q+UyKlO}l6XXU^=i>LKYy!+vN|JB7vZo4Q&mdVoyQ4y$)3HvYJ50C+k4yd zO|5>b8!<_0+I&t?aESKzpSZ7BTCNR_i{`}MG!LcU$oLt8;MOa=N>tS*sYNNC z>ZowmI5~ePh9Vg~93ri|t0RLnuUBWA4s z9ySHh+>};a;xAnMW3LpC_z?KRWf3TmNCrn2^}a2BW@VKhenFcQwh2DLYA)B#WvrUA z)XivI`__-@M!mz0Z1ix1yPjD>f0uJFtK{g%I$hr+|7K2|X( zB_v3e6(^(5FBoa9t7L^P0NlYy!+HC5Y6qije*O@b*>}}E?e^*O(7qL;MxWM{+7S{I zm53WLghxhLcj4R3t?b8$N6q^n5_{t=kCi_>s%o_CU_UOaS-f~p-6Rfn@8P_y2T20I zOx4W9A-~N%2oUhrqY1;&7|D^@f71m9UG*Wo)6-&~Ig9i2e_^3Me}2Jd`WfA;q=d)E zXL|xFM48WmD0BiZ^{&$z7g_j>1&94ir-XClCPrMMHQ&BTSG;`lqlr_Y^qCOzEfx1oJ72~Tc$Oyo}mmccM0qC zu+HDjm_j+1(@ALd@uhV%P36u#RGzWg<=4WM?AmOPuSb}~2nI>CZuAV+pXU8KdO^}$Ysu!eD0sm=dnVoJ_sMMR zS`D<7&yLu#f?b*@#CTA{bkvB z-+sM$`xfzFdUd!h=~o}a&Bz}nvv9)s2{vSIwEJJ4hRVkDvXau?s%%dU2rvzLW0xrC zcoodGjE!&OVIBdwPU{Qa8QPnT<{Z-AZa5ByWKe~hral52H<_4HJe4BfQ^!3G9kjcx z42z>P_9=(d6BGGV%eu)ADO~K=#`%Qp4|6JnzIF6{>r|lkCGAoi`Ov-pDfdTSUP$B0 z>V1|d`dg27W2j%elev3skUe%Ovm3H=vf@15dXFI3y3!qATKQq&$tox``(V}2PYXMc z0Znge6LvQ;ZuU~K>k;}xIT?Je(Mh+SL-9)VY?GCoj6yV^&f}+=l6r-Bi9i=48r-F5 z+|A=#d+ues?6n!9rX~@6kOBt2QD(dJP6}d6kQ`_0RC54n4iA8S*k;9pOE7H;%2la; z{dW8$)3~qyIP7x{3pY7pIy2vj9}epOXS9G>IXMlAL&f`BshO=W0G#o6yF`yoW1@fr z+0>kVWxB3s$^jgwQ0l17(_`XcZfkpdHIUI*PV9+-N6c&cdMM}s$1=Ux&{0eA9v6E| z$xHy5MT3*lY z`Efm3_gmSGnNut!q@E$8HD~O@W77Sw-UCB+$rM45!bIPD22E7Hq6WdA`Wm;6@uf?b zxQyCJhhuKiIVw~xb)ENj6dT$@#E+<)^TaVxvk>PG<=$(UuCv@bVa?vmJo;tjrZ;)! zdEY8pV)~Miu_U92=~~~q9^XRtEWh%B+asvX<#SYog!#nzM!l7OpH`R^3;X|*(gQsE zRbH=z1VN}DH9`GH4U-uRA69MIPws)QNdFPlpc3EfGNblh%~ENe#{8!L^4*E@sjHOL zHxT9^7XES}`YL1}P`guy;@WGN0JFcIsHJ|lh62S41+@jJ(|yH6&D6RE!+F^~osCs; zz8!thmN7!Q5EvOxYbYA3W`>6eErt(nPh5L{PfIJ#e3!G*Gatf}HjxRn^+U~r-A}zM z1Tkys${rnyHXltE=@{7)&^;J(fkuCppNFeo-RcsuZKW?N3Wr; zQ;yc9$~Zw-0jWXXGWM%~e!=f+#$E|vqut|<`Sz>jK;jZW(edzLfO%dEkOQOCNz|FZ zby6kE1ChfHpF$#jusbp{*m+}hx|Y1kaijH%rxHM*atV7F0Co3N)r0#sc$}!kVcv+k z4Li1-Y`5eviZyRS8i@wM&!}46huK1P5hTP0ajn; zP3bdTC(+>JyGCOv4b|QuBWM|tv1kSKz`A zjs-zLYm0fbC#X?Wlv6>>oUKnh+SaC?itaeaJF=#s)-{gr$plc8`03H)n+IykeZHN( z@Okb$d-iC1e^K_U_;*i9zEU->2LbJ`Gt3$->rT$EUHidOV;W746zjv4)vFzJ)U~I6 zxhN?`^nd&2{rR&_kt;_M->VQcl2X8iH0w@?fCp`dfQKMF|7k?USBL{incAT(NRWWx ziwS5z$#2@b=2UgZR?2a1h2XdYMYl(C(NZ*ko`KlXdt6pcHX+OO#bj;5aqDgP!~k`J z$Fh=hq9U^@r7GAu5vtsfV7me=l##?eA&6A*8dcOFXB2<%KruHutFsP)|Leh7@aF1< za%Iy3*y8Zxpzt+_^X2eNn18OW8g0!-;d!$fi?|!&M3-O5s2$SyY`yT% zN^${whvyPrGR*>UBk=88gQHhEX9pY0-qU{XU;MAz)L||US7Ep z_mdo$Pp+c-7XPa9)8mYbo8Y-|iQ7EdmF@LRkKtt9FcmkZi|mSsj(Gjv?)FFS zb+&>VQNa={58g2+M7753pLbiXuX$Qpe&{0Hj=R>-){VN7iN49tr276Q*~>QIsWbw$ zw}d4L8%ucc=}#r5$hoW(Cg|Rx`(yOmwb$6Y>Q5->fRG9?m+3fUu(86fB2GdyCf%_; zM``mL=j7tz)3-vD6d~S$KkLLh`}yO?Y*vZ~xJ`?`f3&Fpe3TJAQx?qghoMx#hdbm^ z)w8rHz{`8g!G|<{;1$p%ve(l2cq?+gI{uzCWZ#|7(Z_!L3Tj)=j4uO$&wtk`{o!NH zF==29&E3fdC?AYz_;)`)hjp*69uK?a3hMjYYuIzP9)OqrJjVW&ru!x2sD5!%5 zh8Xn`~^G5cWqnvK*&v_6q zTpk`ijO+FGj^Q^Y&)IpxFwPL!MK43nz%4@k=#v)d2}A3Cwo1>X7gzbxUMBoH$PBn? zLa@^mlB!EF(Q~=>iP~#NVVuDnLo^H9*Lcuz=`WS+2^d9tE|)`@xF_M1kJ=HmV9US0 zfcZaws7>z}AgzGXyOGXYT2YVdB_k;aa(?c?y`w2FPk4n>S8=UQ2vl+4;O3gyv$6yA zj*TCn>dSJD@rO1=Jd{$-KojnEv08czd4?wC102FNF>!IXqIN`Yd449$(rz{O{Te`Y za>w0qk&wH-C~61b|KRwg5`Q^2^q>f9Qr>JTE-o&7F>xLXyR}BgA~>pn-ZCk6FPNMY z;ZOo%m0M$<0e|GgvRc7Q?( zPh{vV85s&GXL)7vq9upHbXk3lQfqjJNKE&&rWbN_w+!QlsSHzScdUx4d>nepH z7mU)yqS|%p+`PZ~++2E{2)jGQ7{9UvFA-Nrq{i-_)+m z%&$#ZPr8x?^h=YxQ@N|>J~F9^x_6W3&Q9rrkt_KW-!hmys5EJ9AM!T&irC*StQ8cv zZ33Ko0V-KafO;|D0M zXh0K74`7{Xy89Ch5Laa_p8pWEoXHm=wp)1kK|FK({jGs05BmOS!0K34^{4>b z1?N3&-plhH-Q83V{1-KzKYmOGR%o;!f%sVJ2_=Z--qW}N8h6Ne4xs!K3rgF@wK376 zy%|<%2yMTuHlZ6{MCMn79aK}?jLgK7%V<+QhbNgq*i9pR#I+8@F{GpohX?Z;pe2AcRLgjC5d=WlVDk9=4A6(}^)z&Wp`0&8 z09baZr}{KdVFOu{E;+FTC`nc>jTtgyPfS_hOW zXfveOxt=U983ce`jdML|+-_2MxL1m2a;E@M0Kv6ScKlT_8b<1LAYrxzV%Mt(QP;j? z>Pl;GZU1So6+u`Gr4=tG^1{uu`+>=WFgdb{9{r5$S|T=9oDEyNGAn(_cuvQc*io-# zwtGEnlzJ0;l~A;?^E9mR^badl{TbRwio`-|z>St0#&lB6Lyo(<2iC>6kx6Cue9_l? zeTpm9k-D35j6#&q+$%yysQ+^YYy3$0>*`4U?#F*vCtZ(Mvfbz(L#!A8Ar^3pl~4W* zJX`Ksgc4r!YaB&qs z1TlgJ^LFFH7KmMiz`B+SSg9zGFX%eCRzNl7&|3`Hc%J8<^Nc3SR=ogDHeV)J5*Kth zX+6563s(a-(6WoBEmN_#XFwsCAUDf?W04)Etq+(&OzTP}A?u2dB3q07+I;JKdz1@I?I9f4*A4H8K9-!ag)d==!mvdBKVbMuJ=M99A2R4Z&WR}Gs z0R#k^oF9hgTIM?^#r_9?o#Vq$NXds%xYcp|+1>r?apL5}eT)KWGzd6fa#>9}A*ePy zX$O$0zS-}nSXfl(oWy92mmB8*i9Aqy(L_+e9&EFrZU*fpv?3%^Me*4~^Z*#+WEQaDOT?^|Ui!xGMGuogqm z^4eO472(ruL}k|4NJLMHSM;C2HNT?{s31^MS1FRD2QHm&U?47dRM*2pSJDa}IYzjC zxPXb+oF>|H{P<~>^(K!OsT;$@Y9q}7{>5KH8x^w$zf+mH75rX~PHM`zuN-nIL-@UpPU_8T!#NtirNME_zZ_~?740$ zdB|U*pa1}d02=iC1mW?N(Oux>f!Rb0@VMz|sjO2`1BP1!lQ6==TT)%Q=QCAbr+`WGwQ_kB>b&(gRRH^{sX# zfW`%dzgLtsuY)>0Oa8R0iWBOeI#s2t-=3bFlo#%$3}(WVY#bfMyk~`Jq52TF-8XgHxNseG~Xlz zv@alZ*h&v!Q58k)sMdNrI3vyC_@z@D^fH(tI>O&~5^ge{?4Pq0IpAB5lIE0ZE8$ze z$N8hp0Q{=$t*zGOo(aUTSpF0X9SvH+gaOZiGc>s} zarg*%_vPpAYdb?DgwJ?d5yf<{Q(8_{U4t@)inCg=#Rv^>;OJM1-om+I(0m1UAvt5U zfoS$xr1(6%t(^}R1T;u7*>ac~IL`>_Vr{g2O-$?QjB!`A0ek>rTvFEDD~q$Uf$%8( z09p;7qI)0prPpwS(9fUXTqXOeE~&7vF!bS}nkZj?2=?S?Ae7`|>dzy9$A#e33Rp}* z17!Ok`T!19t+iu8s^8eH97v$cU(UP+5#+00y7j?NJV?U~z@l6CY^|*uqS*xz&)#0| zKx8eh4(nuVC=~rN7n|`o7S2@^7sC?8L1qV(mC45{xWEhgYgl{NPY| z*9X8MG|_r?b22$f_}t)4qw?&7a2S~U0*iwm{<5*Eo?C~MB@8Ix%IDg@5|}&FX7w1u zD_$-fGn3HNgg$7fwO*^=yy791VyLBy7TR!*XYI>$om)g_p;0sgi_c}aEpP4+{_mpE ztg%ZqtJH|hg7;{kUwN|pQ|sFKaf&F7FkTaIm67=eWdZt2K@BTAu+TyQC`@~`kO`+& zeyPkdIN50?IP4s{!HT%Qw7dvWA(EFe$x(E%rSRMT;zKOAp>7KJdH-7EkXKDe_8YKn zz{J18)&@%ZSKuBGzI76IcBId^DS*>4^1z4N}4|amgngkwe|D z9ax1OUrCzB$FX5CApPCo1{(9mBmx2gVDo|P=I%P%666VbSYn965Y!|x5+6|(gV`H8}M{C|6p7jKG|z zYIykt5WN6Ym`6&F@b96N2t?qeU_=lV^0GD`Q}ciYR*Wey~}>GzK@6B84= ztRF4~FzFV+Zv0d05`eR}HZ{-yroE9>6tl*3R-vHxB-4SV`Wo&*-mLEQw3~V+cK<*| zp0HZ{5a;PW=aB{vXtKmhAA+75$UQ*gq*lwVD$be7*#>>aaygJv@ymmdhLU(wSwNy!Kjv9tS8rmpn29sv80T5j}Bm9m5Jvim@Ct{FW|11%!h zD%r;IhP4~zBwrH8{WJ=<<3eiywE(zw-vqvd>E-QZ2*Xhj_h3>f&^^~K%d1Q;v^ZfF&6s4-hCW-L+dwSlMB0LPjor$MSeKIhN5sXY7T5pf1f5+4_8WRi-$>I%5TI=lv$I~n8w6gG$L8iH z^S4n%aYq9TXt_i_5t7G&eRQN)~B=`7n zm&-UzzqwZz°IzM!@3i3s->t4Rq}I$|i$Og(VYWPTTUIU*_Qf{u=GY@Bw(SO&|l z;g=UTctr{eOT}HvseX>dC3>nWjRof2r&qqes+)BZvkAl)q*JPGFsvr8ggB6)+4mV3P zK#ykMZ@~K~eFm`Yro8^vd@E>|*~@2uBn$w6B?SeAHBI%$`Gw#V{7+8*f06ZYh+ACx zP^>^poaKs3k2)c2pRbzao<&E_>KV5hAKJ~`H+mGUKu-3&+qE|HqcOvp3U>?XK`yb+ zbj!kIt#!wRRmCEolrM2K$lXcTa$iwCr+km5`N(p#(3|W;r9DkDxX|<5 zBkB}Hx!{Bb!iVXGzP_-5YklCfSLCv5F24gEbm)*S=#T{hA=X;JpBT0Z*!ZquokP>d zuB@H9cVrjwOk@^OL>=AHikk)KpbEyVBoCdf=-fZ%;q7m2ApD(euE0v)v65QzO@@w{ z85eq;GJh)qMY%rKIp7hup}##aL@$7lp*ce~2FS&72-DV`<7uTApwu~3$G~U?nlQn@ z@W4PbGz1QVMLqv>APZ7n0iYA$f<7;^4+wt1x;eQdO#b%)#RIS7ovF#&bkIkxoZXE0 z8qN?U#ujm1h0U8UfaO>n^An}v+65<-XPbRr$8l%&avn5_rje9lR{C^xblgu7aVMss z$={64VQhe^5YQWVxVdfa=^@rX-WWL07c{^B9&}2Fij#z!n~>L}{`6RhDReo@u>*Fl z<-ySrF!HXVJ`@xXK{I0D#>@Q_f}Y#2!^4S}>%dqn-E1JURcNR0fvguR?UQ&3=~nT7 zU-~I%DYj$dD18djF+Ry*M+6?z1W8n1pZ{)sV9xxV+xET4bpO;<;kq0dHMe@=aZPkN zQ)R3QXu_J&D9Qo-*5jbE0SUaS*(MPR)DixGNr*|^+hOCMNa*dR&G( z5w$?eWH~e;h3Wr>x*8fo1Am=wfDjL_3B(n_!vE%0q8x|RQ*MMmtng8pvUmD0aG*j| zJm=$RKtvtKL;J1!JL7w*Ypt>Z8M_tG~~;J$7)Ox&&Tjvs5J zyE+<^cV2F7KCb!RPW|~USDnTSRogoQ+ua>8KImR#gY&JyB$qE^prC1FJDk9S0%ri| zV|5lF!XbJT*(>fH`6Dc_3veDl8#iVtCu@|loFjF-;JG~L|?haR3f z@oHi-yhEu{$EOj>4^1_(B0*OOS>9VSXtzWIR7OSDM4#hTP#rdc-|tQlcn=v^frybA z^QVa0LBr(omQ&u@uaKmqq@HK|NCW|Qdh@VRRJ@b?lbpY;WAm$R_1N#HXBT(vZ>N>u zJQ>b7JqvVDL>2nPiF%5>d-o2tc;)xOTmY@i9eFpmvpNiDrEE@ZVPw=i2|Z^ZOx72s zAt!G~W{fJU;J4x7=%LKN5`DN;s}Y`uB1};mdSMYUt%`HnD zpOUP9V_&^M{2P2?dP%21g3t`DJBM_S@E1*n!77ty-RJoCE2YYMkmc3Z3J>(+D@H%F zOZD(SpL`@K1rrL z{H_G=6bS5u?-{ajd%hPggH$}}(pxnySYaSCH>ZC2?3wDC6SRcP;4RP8d~#JO-kvc% zz1isOrE$izvlQ+(5tp7z%0>Mai zTurJkm(YV$GdC&1aQ-}3qh7ztKj5gO%mn8%8iZgVkHUez>?kjg_|F3H#JT0f&%f3q zwfkrAxZNk8--5Tbv%u>irr6h*^JhOt%j8W92R;qtXiPJYW}#Z?Ew*b z%LN{~-U#o42ZDkml0useBdH^~ywb)ti8Ul|R>Q7GiCBFs2>v!@k*==B%H9_e?na8r zL+x$Mgb6zSMs*Nmf7UD#UvGFK<`?Xc?Yb+L)i28RjC*hBt5q0p8SlP* zoAZWPkt60o=Cep0IP(5&gwdSNah`EwrE4+F-{EI)Xcj-f?LV3>>u-F~zztHYbB{_d zBWc^q!?3V@AT9gzn4SJ9Oszz~BbvduEOw7b^u!Amp8IgDn~31?<0(4rIh5g?J1%t6 zieq~K453?)D&z=8;I(#kQn-l_amP_TdX!OOmq?t(|0kaDqhacZP1mMMn@?>{}Sx)3P*IGUrCuNVfB1?*WSr>5?5BfCwM+SzhCn1h;Ar-X+$n8SHn0t$w5MjFA} zw~PL5Ylq*F1-VVnqwRwcGfhT$Qc}j%P$6ugvKjVX7NQP$of@YIwa<<{@ptoTQD2aL z)4A@4+s5E5hC)F!3BAFHP=RmZR%-8kPJviS7ELNBD995R3=JH?=;<{i^jSqlhK8{d zP)Y#)gL%)}zk?RuDZv!Uz#He|>HEHH^y5XUs!Qkw@!O|oLb*>qD6IbKA2~izbM<|X z-HAF82a^%53MiP879YL24vOw8SK3O!dr=GaPv{2R>8!gkQ*UZjU0K=C(Gj>)^xtPu zWkOB_H>WN73xd6M9NY&ZsS%g4%N(<2X>W}%qYnI_RWVxb>Dm~Nd^-e6gp`zrWOlVR zHFtO&pa&_kSwb>s+3}O_zfF?m8nPMhJ>%x*5Dsa8LMek(n&#RAy|A#?NR_3XQuCYaZ7bYWwsK@%Dl&xiW@`b7(<`;knK z^`AfQP~{|qPQ-A~tA)CQe^|$}!07s7ExdiNf4dv^79}!J`~m`L(4JlH7#vrC2g0#OQKT6H_q+W9x<~+ot(uy0pH6>W z)M<5?efaxy8h|QcUtTNE^xc zp){@?*F=x%ulMxq^?UI5)WQ995m`55uDiD>&rEfJv#tnj$AFR?D+E1GB zm9Rjc!uo5Zpbs$_@_`Uc?1rJ<5+oLF&>(#a;!2+VFI82=K5l-_&d!Y^BQa*8u6q~Z zK>RG;Y5=Plfbh%Unri%;2UL-;y(rGFNdCROv?N52;+@fut47UAL%NUy3)efD(BIBZ zN|1AW18C)?IJ4w(Qc^h3$O*u|=Gdiru;^=tkJ=B?U65kYLpNtAvjR`+S?IvX2y}9_ zpeM$CL+v@&3Fw!)Hb}<<8FV zj++R!gXg5Ta7v<_%OqP+V9)@&r`>)PwC1=7EMgzDeC6q7~lcOxacpcaX zmsmCO6QfqaMi9)%&>xR{Tf*l#Vd(UFF4r9FnqZx0PxtfH%Wu}T_1r|Or%E0M|Fe~Q z&vfxTkUPtf%jbpQ>Ni1(M=mw_ok0@?IetS;*}mIEDMSC+ri!Dtn^QVA-yFoTw^?N7R#Wi84OVuL3q+OmyOH3BK zh}7MgNkD#{{QrKw1+L4Jmg66@$2<2UXmR&!l91>3lWO86UF~?OhwnWu{?FOY*Kmu9s+~)uNoEY+RoSEiMKR=sL znf+97nKY)&-@lg?3qQqm``^{*|6Xk+On;&m+m6ULk|sq@$yab>uqm^{jD}KaPyc_z zdkYoiWlYfg>esXZ6j1C|?>|%m1A6o?oRnBgxQ^8`@lPe6pz($@%kD)>KIK z@NjC{Xfh16F(i{Ng0`v?(qso zTa)<@ebcr+wrjAlx7x>a`+aSJ(fss(V~^(VDe=>Q0-KQPXPd=F88S2cWi70~&42$x z6v`a%?_*xYLEb#D-TuU04`0OWhpbCY2h&2X#z<;acLa>(lE}ZYG}1qidQbDZks^6Y zD=hT(uT!;{@vR|?Uy9FovYeldiFtWlsz496^f9>D_x{es`=Yyg7!3Oc6#adCF!6p+ zd=VYCXBPc;TNxkZYnR#)&VA7#z* z+eqf-DGV5T-Kd(|LJa>myX>yO87jmNTu9Jr zNR3JPzdgnd4+eH@g{Wt{V5#jXpEZZWqt2F)~Wstam>R7T?PGzrEb^ zOb+MY<1puK7QE#0!H}v-{V9+mOP&4M?uy%}_|I2q7Px)u<`Q&yOu`;>X_qFg&EOp| zP`O&-LBVr+`7)-Xu`K_)i0bBGB`iDxU(b}3AMHEU372*WM1^>3FiQqMf5`9jtCG2C z-Wg0M>qp(t-+es0LsL_Hj%g?Ss8&?0!9I7{u=SlXu33&~%;)ZR7cyup8qWp$yZ;S4 z*5~l1dU(1b_w%5S?Jn5Ye^3U_PW4sM#bFM2{m+YX1JnaAeK2o^cU@o=!C}2Zva|BT}W` zWsNs%_Sk!b{tZSr9^*+cOz^GGOjK6CQ?~E=i?fJ)+GxJH>yGk&kT9MzcfYV|kWj^Y z#3Frhyb5D<&!ibPJdSMEHiDR*1smIooZcM!n)r2l^gT5HE@}-I#h09SGBFi?QjPH_ zOixiGMDwZCz$4M?x7n9g9scD9jsq5FLc|utsvR!Dn#EQ#BO>7C=!la!>|6A?ZAF^K|dqb%Z(Yf0h?%8JBt3QY7 z>7f_Ut*1J#!aux-`ml;E;PU$UkInQ{BjR;oF^Y8CgpY==sN0|5!10Gd&E6_bgnPG~ zyt&S7;<3hdBR+FsX#dI9*|+46?JureKqmMhSuad*!3)HSzto*l`tufeLjL-j;yq0D z^;{Q5J28+F7LPc{HhTA{h1$Zr=wn;bK|C{FHizwps&4PQhG1aCJ_)GCcUP^xr0iCHB_h#~;5Pk(1*S3DM-4Dc?%oqm|NImw5BF19OzyD@PRB zF7?K(H1h8Vab3g^@gyanZZ^=jT_GNHGC%kCM=N!}=^v+V$PW|HFN-bwU|3mVt&?$C zp4X$}+Ip>+<%CvPsv>ORD?=`?29|bUi@azRfF;R0jq#r5f1iMPgG}bYaaQ_X`G#o3 zAI+ixA6H&B|E%XWB?Xvb_lso3iewu*Z97Ru?2vnn&PhF}rcJ6->>R<9x(l;S;m(a) z_N+m_n|7A@=w$P}qk~06ri1H$TTqrv&-jxKX~H=;d@T4NVyHVV{Jr|Wn--9XADB{L z>w4A`YZnxrm#q(|k~% z(jxp9oWzq<8GOhRz1V8$)6CR-!zPa0SeM?{bE`5`v~K8qteRsvXV3oZ@DM&-&-d@L z)oN|d_N{yc`4s*JAF4%#Cgnc56W^>te`M;%nEn$xUY;=Z=OXbmYPPVmxdbCOG5fQ1 zla+B&spX}MxlUFZ7*DET=V#s5+$t->zbU+MXXcj4g^Y{m%ozXe3*SO0F18SdwvvRl z-X;!g%#mg;?ks$A5FN>#PTW+lcq7!*VK{8eZm!t2tH?vre%$2y1s^Y)0)i#oYaDvK zIOb0u*qWl=X8pq1BYdb_r%snQum6CSkoON7isj7<_$anrR9tZfQvPy3e-Ic>BoJ(d zeWO~QJfxX)Yjuk?=J+WRsl}bz-4pluJ*C^dhe${VCv$hbJVK&I^2QXFsxyU}@JEW5*<_M0-cLi$%+6s;)PsnUkaRxhNhO9qSu@ zvA&U+;r2Pwoj!35e{TIjQaB^Y&I&1GG`?%n6@L~po5EBgPv70xe~yC;q(NZO$>=*CmFE~xmg;o%tqb3P= zJ}otIuw^s*7%p3M%MI+#2}B+f-7Mhw6#KZ+=rQ}RoTp|qT6;V}*E@xIr4!3!C|fin z{|1`T_cbN?cm;gd3mVRgG$iN9=56yz%KSjHS#0XpTi&zq5A3zuU^bWJLBRGP3vHB%{m-g%C+bWJP3$Y_dYKsci1^?z*n~ z{@%a)cRh6dQ$C;f`!&vap67X#vFn1Bo zp_7Blbb&sU3Xj#qIHO0FDdJ+n@A+1X24d{z#a|!Qb?)`^jf&VBk{1bEgPb^G+O^C~cRMe)&A;mX zAx!xEwM10)1`|Xs7@r(IhQ!QSGrQ~X)~+q-pDORD*qGq;ua$PpQvE(ftQ1d){w5za z2IlQ)UA=uLdHtZxw}_p!?ZU+zu;Zzebk%+CKsq;DDw~n#ckztS!s}qMK8MXD;Cf9@ zaQz5xm|AB-f>#LB^RZX(Gn?ev_1sSUo^MFBHS?7txvM5zq(jaxUNS1(i>PN!ttJ1e zBaSv$Z2PdojC*%s|6Rq!K%aWH6$dSpUi}CByIG=)v~2#_5V&F7OxyYX#3T0ZxJqqKaPl&j2lXy#{2L3t{Jbt4AL;=O>04KUP1x zU@_uh1anb%?(&+d)w_`E_g*)MOx~fL0p%d<&kNf+>6h({LoNEjZq;x^;O>sq(y2WzD-}B z`iMK0Q^40pH5c94r5qF0YS)W#^dwV*cfH6f!|v2v8){{u(Q<5*Llz2s6YK{zv(V_i zg?L{!g`TBQYoC5FOv!9Tkn0=&K>I<9PwF^(4ti~i;X_O@n8ml(ia0A4iU~ML;_YlV zj`Pw#TUv`=#D$)idVJE7BLp{4@K2)0gLT+EqRjvFrw}MDc$sOi&`dz6$mMbY{x?@z z?b4Za-pWzraqe3CKSAIfb1K}!q_51~xWxsnqU1MZY;cyU{D{s7TboxmYv!f}>dN|f zTxU1j30ys%>Q}!_Q`5V%(2_)RF=(zX_5yhI>Z5_LD@v+rjdXt;0L}cy9vC5O%hphr z6Ucn?{G&G~vD?S>&y8QI>t*5)S4i!{o4wlp;4W{_CkQeY5>>%Ik7VKby?V1HAUrF)0T3}=IW7affnP)Tj$uPfnV z&bo8fTG~+y8z~uRIM=4%I${z(1Z#8UueFicKC`PBO8UVo8K_RuL+zO70v0H6YEsy` zp{azos5ygx>w5kJ%=uTi!Ux~zB6dPesl4}TQ*1G%q~y~XFnjsSM~RY^!wOcm4nooy zh)C3e_8c~%?Zqd2L--&2I??0cz2PFI%c2moN^i8;y$+CD+X2GQ>W%MbWvM(|9fz?c z)NPi*nhJ^*9mF&exC3A2Iy7zXFA^)DW!2JIDh$0Id0MAF5`U(fra+QW*1CByLCl&8 z*Tes7aKkwbcK=d+QkT^y1aI83ZQ9$D%N1_Oz8`IW^jVnYwo0t^iSHIYF|urGpO+K9 zTkfAD?8hgBnO`uivoq|k-@ne9(<+lA6T;&}7s=N-=yL;AW}*|S^7N^dsbxx>T4F`% zNUT+rF;zrxb}$}|IPSpMhLXxC@%+8}{b(c6FH_gUmijKQ9lxRXP`n)$Tt%TS=CD&5 zCqo*$0EAOvMq1pG`#tB;iEszYM9jZ_BO)+G>t8okE0-uHH{eweDZuiz7I)@x#Az-E zoG;+MICumy!YG6}gb~Mq7Wm~Jur~=fQ-Vp;nGWD4RAf%4yvRs9e z=8fR3Hp=U4V4yVfFGkY%qk92>a*Jb{%6j2TVVq1H5)EQ+)fI9L)Bg9(&>gfExHmo-@r^|Wm5Y)<5Ln3>4{6tC4HMSH15E5)DQ!tP;=gSBC85jG_+XRsNS*f^ye zI58xYStTkso_b_2{&VuA$Y~F26GOe8+PzSmfUn5;kpzL7DB;>+5xuXpvhk*dw9e)x z-N=zjdbeZnkYnF3{-}a5HQ6Gjiu1QlJKbC!GH1|zt9*|{Ge0gYI;=OX%$7DM4tn=5 z+5f*V-?-*+H$H)^_V5?K9c`gR4;erC&eTP91>&GCbBUkDfT7*;>b_#2$!Fc}r=+6vx1*)=jP;cDALM&MNIzl@qi#?R<>Q2eVx01;t1VYjen?drb&Kbz4cjmrut!r zVX*{BECO_xxs7H&Nt}}ym}b1~V+=u6P5CRUG0F4bDemm`c-XUxlVQo`>3>A=pYf|Q zb@6<#sWf3K&GY2{&HN`^_Ngkz+fHYWa8WLP`aOrnh``*anAbqXjyyY_S>Lg8eX}vo ziA#Wu`r<~vF4dyd9e(oT$FuRH^_bnI*^Jm?{Ecf4;@oe~1I}0YmMfkBP+lsHq3>$e7pFS3Ux!RQ!RckvR& zmPuf- zPs-O*ME|+{XgTd+e&J8l?BP+Bn1xs1&AKW3>Te1u860kZ%)Z%HH6W8}l=#NwgjxIP zm=L_072w9N4-{Wb#;El3C`pzrD0rffL%z0^UlQmx9x-wIiepppC6RE(lPmH7ntfkC zRxRyTJ_s$xMB<*5*uIZza>8he1Y!@@KHRP>_-x&d zL!jck_>fCA4^M4$Zm%+Z$nr0Z@{nD0k?r))Wh5lAt$Tzaa+-z)gJdWj5eOEKaZhCK zmP+8@0_^1cQ?4cXq}r^<9GMjLDj`F=n!!Bn!>(0J$cB`FJk`Djp zkp2W0tm^_4KX&bZFvff=_`X-#>_>Qp5LF2Fv-SHb08XqU;~sbRYTw@vqKoNim((Wy??VFl zadn1-;6n;%52U#G8C#q3c}@`YGC%Pnr?c$VSZx3arwU?}Nz@{SnRCZpTKZ5a*!@Ff z7s&!WjTNj1k7XkU0g6%L+?&qp|9s3zWc}99;E@*{w+-<{%1qkE6%?++%WD3tek*;2 zg7BY>#pwdysl%VF;nv!$DHf6IOn7M=JM!9|HO`X3U*qRne&Oh%C!3V9)<(0DJ>}a_ z4m8nYH9#)aSw3oP8N8<+$bN9j=H8ODTNmt-M(fCHpP2szp4Yp<;Th}~w!43*tszk| zz)fP1@eHP{fm@oyitzTr`biwXHHLBs9_N4j_!pEFHUDrpPE3AirEA*w|GTBUEUaA^ zc}&lz?87b1^)$jG|K4wOFf~vSqvQK5saC770R#vkSN$P?t}*v-xc@(Q^6kIJJ3a6< zYZvyv%b*Z%zjY3N-NbU}@2>-E=w+?sKlpOzE$jTFYgKxZqtAj9Nd_zt8m{`Ix%IwM zceRZdHnK@y~p0`u}VownidbZ$5M0s~z&I&U%YTIJWTbAwmIK$$9aJdaDnR@Vz){!1%xqoQK)B9^VTCbqYM9d zT@eh4HVU#aU&i+HO#v$ea^MEqF2PCA`hWMZG`qhqSK)eF?&mG>C&;7N|C6Qaaz*R3 zYNo}@*p8|8;P(`6@lm0-dmi}!I8J70f}pFkz7}5&m!^bx;Hw@FX^auj`hEXojyU>% zU$`kKh215m^^AflrU;np?~41&yGm~se=lsY=`--*HK*PkHvKgCmM|_Zs-WxdSr&YE zMV#PovL;V*>Eb8iSo?GaT&wq$HaM5YEk6(b&<_rFJ`D?eJ&hq*3HR~Gk9M*Ddq=f9 z*-5MzR!gr*F?U)0V;^?34Tl7p+Zlz=TlC^%N3=GTHJTcS1p@xwp8v;s$No?2&78sc zFR}er_X=9@zs~^2T$gxccEPGWy%O`<4FwLG{?H#6#{z2!${B3cd`jD1&Sw9Po#lP| z!dqKGm>$k_kA+n}L!QgOD=9x`OV<*Wv0|pl@Z|!Qe6C(!fAN4jR(aD*44jN)hVrYP zvt?BTW>s4(q-b6ee!5e&>U0ux?wpe-+{iph3o2Z{0FhHBakcnWFBd14N7aO^QUM#7 ze2)53IhB!7593f6FCDqah4L@tMQ`yU=D17f)RYj(2U02MU#5b1UP%g|KE(4b-{?w> zj6XesEiW2-$@(*ebYS1B@Ozq<mtym)a&*rxPUyqw*L%Y3dMhSowe5)8)JW`n5I7i7e7b8*#8N1Y<$G7+O7YC zJoZ8%qYlK~rG9rCQ%i+PF3Qc(&W95{TjDlnt}N)fl&6<^5BE$HW9eWfE)g^$xBCcc z+lh$@XP6B?zfzc^zA|yP&3G1^bNZ`rAtDg<+>VC9cTGTY=QGQ#Yoe#UP645te#;VM z@K;-rGRGKu)1EV73rw(#-{nZ2%rTM#vGAPipBbM>uAG=c9N>9E8WBjfk(II9YgDu~ zMv*?pI6%HNEPW1Y9S9R7ckt%7`Mxz9`u`r@pWY0JyePeoI(LzwO@*%^%y+XvR6s~$ zNci~s6`2<3snFi=2kNqKVuyRw)6bm#so8F)b%KuP@?Hz^qFd`ufwV-CEG~WaKhxgCG}iafFQ*XK%)f=&%3O_YhEF*CwAt>)eklycPVJ zCMpH1)cQOdFC;3pTG*Pe&w`e&U5U{@Wh@Z%quUy1zciHlVeZWphJbxD+YDS(QE6#g zNqHlF<+$X}sc+wmLGX@|tBHr2^{B6~>b~@KcsO3X=KXtW=y!7ys`7*8_%|0*O>RQN zv_Thze6*qGv^%0cK{cCK%tl|yZe~k0Omd@x&0zewx=Lo^d$>|~r7Fz(Z7sIgne+9_ zXvaAziCr+rRD^=47vqTpD){cxKuuJcpCxX*=&rm!isVk&Tj;5IN2>&#XM`n-+S+{E%}XnEj1>e;e-Qas^vR<~ zL~COeEGzFdB)1iq;cs$a`;=Q{P%xuGI^dw&L{73wK~`tj#RfzAXst3Dj$h;N*G_B~ z&QQ-^yugO;V`l`Jy-)Wi`U6jQ1|DUGE%~t z!T#-r{zkHfw6cQ)9Z@Mr={|Nqy`gDafW7@{k*oCamOLS$-!}USoPJe1NzrLHEpv%W zv24$b#wI3WK!3#cN7P~LvD^8bamU7kC6Xhk0MoEjg>?+)Ce|g1uGl_DQ2^ky>0}lC zZZD3PjS26suAwfrM5)(tm*Nhl2vXt%}j-M{Wv(3Lu3jx9OmMne9#gJio7y#2caitt@c@L72qI*Gqnd zCK#BoyJ3~5?JVgEUeEBeJcMwu1h7AH+R$m;`!|_d$Y4k|Fh+&4zK$kG8yLibK}%tD zg^C}Nqg_y>yZ1q5diK~QYz!!=Qd^l&FC>FTp|(y==TLrdQx=SAQGM5!vr@`<@Gzfp z>xaO6WE9oubzwD?tG_^4!>Mp0kG`~87~=NgJtN=71J#L@(Z5Ix-QsyDWhC@K@Slex zuP3jivnIsO!;4rT#IzfMhx@7fJdIkPxR<;yzgHe^L|fDe^Z<<)U+y{dfpKMT?!CTh z5(q_W(d!^rRp^)-0+&WQ8NvjSpy24EoB2Q1)>IEFkiBE28R93mUbx6935mjR8_#R3 z7tR2VZ!~RZgk5N!lujcpQU}v#*r;AJJ+2qhr@)$1{SLW9}sCrd)=vZ+1_2ffX%TM^rWrdG|%1CtU=P8-jdc(l)*BGY8@38_sTkq7yLTsr2 zhxl*)qD|^(`i_;d0>q=mq!OVt8E7Q&TK!cYKY3u0BRdg`W{;stZjxpVemFwh9wZAj7x3&wSQcYvy?6JKAKdr{Y@q+o%D7$nbhX&B`d>eK&l|!3vag6<}ejkT8k~3vSV7+%6RY_(e>zdMS`#Sfs zC71OYb@ZLZcjRNPbW{} zn{kH)V?J{H-ia!ZW#JnEyd#dXM#3nvxFU$tr0oh#P_-Z*gXBx`bBU7)3hGb1RwFhi z@g3Zmkodn0+ejS6+_UjrSXo3dV)PXff_cYcm=Z~N?^q6&!M_!$QtiA(YC-6_`bv3(0Kj8I5~| zj{=Y=9mr~zYMo(}UT0F+dOuN9-+b9dI8VO~#jiviAz+Q7!o%K@Iop--I^OQso(;V? zPdl^U9z{7=ndzbWM^TqtiTCvvcmKB2^wAvTA?+^VGi!MeV`H$B&4>{%;(x15CW|Qo zx`jVFyuK$ijN;mhPmr z(~kQm0eqiLr zucwnH*dMRn$tF?62w`dlQIpTkc)3D~lvlrcwi(rrp9D8DU&=fpiB}@W!6PZz%MDFHhNDkl9u3UYA%cEkty&sV;f!bd z9nlsVQoqST6dqbqR)>!=t$LJ&%Mh<#_UOy_at{6A)tDqal+EFU(90$n;cev$mb8I($8La52#%r}=R%;;;FmTlY$57_)~re{a|rS$G`sxLidj9x)2L zk&eW8Y`iTo9)5!6S=o(-oT_eSY&H2nCfq0San@?$o1-iI(acL;52)yRz6p@NRI#%g zC=I+=#@Hzjz@`0UZ$@U`K0)Tx%e=m*$hM8US(b_a)oPA^ zFfo*T`{%Zu-AYRN^ysMQ#@}n4=){4EWk{oVpb~G7$Q2+xtnTBd|C%zh%!sh$B|jzq zI5#P0?E0I}2U;ByWuJ?&m0fgnbj&5F2-WXjf*$2I<_5&r+Pb>x_Z`5@AFz|7 z#}$)z?(yf%cdmCh|7w#RTqx2{s1N>Q-E{c_C)> zg-7^u(FRw?_;Wat_wh4Xgw-sS==d84nW}cI_t-%Zf{hU&hj2g?a!9zVY1rj&Uf?{Bx=F-NgjQ>*Vy zz{Am~_#fwpd+c~qD;jBAJ~`L3$% zd_K0Rx&^z`YR+5AmIRmBn-lLB`Bh>9{{3DX-g>rWdp**{w^`|)vHmA zlMW(K*mR{L@Ds&d@Z8YIs&*;Ep8dlHv`oCYmg7&4s@A?fc~_vpBsViClroMGZrAqk$HY@WWAUq$d7yWNZ0}PP#s)Ki zArAkllO$kWNSl6Ux8R^^HRZ>hHl_b+s0-j-OFQj?U({R0k$#ll@-xpZu+rwAVkl}? z%j&m&GNNGUE>UY=Bpf4{nKi2WfSL3BhL>#?E_P`EriuRF znCA4f{Ur|dsWMREw!vQF$YjnRd#UQjOwwCidI2>#Iq3qZ zbK&%8vjYkoNum`@soJB?o&g%cpf8Fh%mxCNBN@M614_{+lTJcueOcezkmN`n4e8OL`p8x?xSpfpj0;OC!2Zte_8-!-LyN* z)P}Z7Oiu7+&JOy?OkCd^A}cSjgxJ`f^n%r`1J}I<0<_?L!`8}rntwEE-S->D-kG6W z+;J(Gnit+YpR%x6h$V+4ZZKeg9-mSxN-BwBPZEFP;TpnEpO+MbcL|l%%j+FCD`xPYW)W ztsqJ|yb|to=ZKZ5av?>*3!W8ERSB!3@1XD96JDKRDZ!KlzzviMJS17Pz8@PK+y3Eb z)1yqsy7RoL1jsDUgn(ZxsC+D0P``I^mCquOc;MT&?Aiz>y#15Sz=S7UcddV1$!vW3 z^b$f0iTUk2-(H5x0|lLoGsjD!mz5WqUctaQ4hFATj94dzOls6pWMU)+^=fYKz#26pDMieCWPOHO(WQy7MV zL>34e0ZwmDBk+(6rqg1lc3+!Yz`4EG1_{=&6u zj*)-L;XCq$3_d?D*G_M|SjG#131uU*;grU=RQ91-IVGO3RhBXJ^bP}%c(XNpjA}He zok>ykqdeT7xB7w4nCfP05>gh7=5WzR>zhZ)Ckjbrg|Ope0$l)nlRig@HOd&_gIwmZ zLAlmC*KBfjcsC_AH9^eg19gxMK5l63!BQslKUAiQz)((do^8e$bEqCa6&TFR&Bcf$ zq7Ug`4lOQruAeR{D{F^3bZ}IKe$!yiAr5{HMidJROOqVu+O=zXjeZAd7{=ueG+ewkZUY91uZ{R9%S`LhW9(AtOPO(D?_U0{an=nfAz7i_2r_Mbc3g zgX&HHxV1}$g(k@yd2KJr4WG5DYA( z%qaOCXF`cmWhc`uGfW>+sQ_bS-MX@9|DKyJ-eOc~9fZebiHc*d1=nH-BXz_r3?imE zTdWiD%f1=pg47IhR`KI2HD6zwE*vW2jIP`jP~mX7S{nJ3edpB@nZR&duq=75t>lnA zCmx$4?v%nfxBW)nP}RCR-(a0@;2vOo-&L z7?dNSX!2cj{Y6wD5{f29)ibx25k`g**(W`)661cyC8&^SX2$yk==z3HaqGevZ`YWu zD*K=KyiCsHJX_=xy0hs#J#WujG0;Mt5j=|-#1-uZFPi%nM-}vW>;sPe{Qk{W&894; zrq=teeC4;#oU<4)ArT)B4IN$9pKGuhQPy{WRbH{W4+$3#yA{%%Vg#9?g5QWwt=9(5 zFM=pcm=5=zl~^Ahu0}tHssk6PYL?j(60qU~z8PBw(ZMe|QipxEpKvWvekBxxnDXb+ z!_N*N7=zp|aWtany-aFBESCJg^PUQ1pEG7|=4x_ONEx^vDqNZXu$jl5v>Gon??Mkk zN+l&H+xg_e%jEGam54cg5Du8hw#>i@hvaOp*jX26b4jj2-}HAe?cM0ZMW&>U0<33Kf!t$dqr~^ zW`PS0SGkY*yXRj~hqnryC!36MyYDeaIcYsEd=%P$!#@-e2z$Xf4?#^iuY3qu{k8hm zVAdmzfOD(onr6LzM-?-Z+5xsgo zg#UE`GaMKNA?5g0uedS?4#F|JoTLCRzgbv7+Lgw~AuWOz4x=pvH z6}J}NoKqn+ex~-cjnA4Q+c-k9y;@O(LSf$(sW5?6hEhVwL^cwr42V=5E+dTN76MsR zbT@ux(ly2kTg<}HW{_J!uy?JGiC^c^co=C-6_fF5)Q;lFvB$kezw4IzHzlPF6XrUv z@`%i1j`a)T@9;dM^jb(5)&xd(!S;8GkL$W z_sy1b<`l&znB1kP(78mi)=)*w9q9CMBfxN(gYpQeLer!C0oT=}L6odGa)SKFL! z{=FOq{CUh-`HDYg#JxxnV{ljpY54v_#jD|gC?-5P^FO@`39ut*9aP-k8|glQIlan4ILQakxlU6kQ;zKX}MzQ%Aa3TzdGR%KQ_)Vc^Mc7)U9BeIuy2 z*b6{(#e+$asfXP>Vw)d!zl@6dyg!c;78Qkw;{FKM8i0Nuc&FGmnqQ_G#|)R;Ej%bh zl8!VFOgS==8QCvh`!n~%QzP!MjtK}cjld+s;}CAYneTdk!20I ze=|2jST?RrkoFP{E^%BN9K+z#t*~VS9#XpK@o*bAS@ppmmU$O{34mq1E7MUJ-#HVT zb9ePq@7*50G}9oJfZZG>JdU^Nb8ot+#3``vU*DgQ()w2w;1H#dQi9$3+4=WkWmu~j zB-_qMUSw47nZ@G2fBR%5sJyn8Dxit6xR|{$;Lxv?U(?7a9s;fSAA+i1hmwux3G5F4 z5;r6n1+B{j);?ki!ohM4HWtMZ$^N-nB`2Ox_A}`r!untWF}({P71^k%bPnw@0yu-T zNW_N6SS$QT9)8zkt$FIjWj$1M7#97qGbYSGvVLIV$??h1&s{D+k56zav7f305F|TN z;e|z{;@Z>&4=7}*|EILkIPM6E{)c40$5iPXEtvc6o$W}I(#Uj+ZVAQTE7N~=-0yXE zvPagWm%Nw)fjyk=r@p(Y`=N|6S{|58SIA&95{rjff!l1F{_Fl1hA1b)jdM#rh>)Q&on^`iL5Xtm$w% zdV^a|=qiF5P?v1+DM2A86s#(Ww5)2gOPa*nTZo7b5Spr2(5L2@1naSSX7_8csj#QG zbn>Z%D3b$!HFN?E8QO*UwoLV8p;2|@L5E~PaaFXyJ=ZlQeaS&6GQ=0e* zKxi;1CXOg2z5|pmm+?MGav+bl8{0W_`9<`I!By=&7Wi0#H3$*Fu z=-NYlF?#~vMojHCM1HG)FQggpTZ^+~VTf$3q~T^#;E&5)GWthEec(P}nhZ3zC*y7S zb%##c&ueNIW-LT9o^D0ft`uZ$4VMQ{AbGx_TZqd_+KtpM+McpKCP`QwtE*(7_s>Ol zKp{e{@3h?Kq^npgEVnO$%GgjhrM8st%yt5*n@CeBpu$FsoEr1(*>z+sd>#Bw$rfLg z5~84+%3prPVCL|WG$cH2Ga`T~JkJQK5YV^h%13U2Du4`OQ=)P&kNB{_I{b>Y2S#^5 z^&$&fo#X8L>3G`}x|7CNV|FbC7h-eMT+Qe322wsJoqxh6;vw;p`ictk?I|Q@^|mhi z#a};nIR?}`J31pcLGkhNNS_DLghUA)C4&!0II0~P(M7dZse`jHqe9~9jfOpALPElx zrN_wI-`u{S=$f}$Zbr2~G}$XQB7S(1G{kN(R26Gx!y)J0llvJEgG&TgFQ-0@@jU~5 zhsfR}qd-j(9E?R{E>^9PPgn}Vp>KZ@C44gXg6#dkPIY-bs z1`Xd2H=~Ls-hvJx`x~B35~~)7Fjbs~E}kU?%Y6`*#{S$rmp8diUj<4OKNi?iQYzHk z{Y#*mJ;mDro}@Jc=f-ms#v%jmtU@?b31%};PJ6Zv4yxFJAxx%_zC>t- zZN9S|z6I$&ei#exzd#K0`1#XWrZV!Pv8=#`4I zTYE+2nQSDPBHVJXCNqN3B)!s5mYbKR5^&X(d&jw<0l%Dngz$+Tr0c#u2>;Zxl~6=% zse{@FamQtn39v#Y$Mcl?dx2-3DNmlf^t+&l-r8!txIo#okAq?@+uL7A=pT85EITP= zW!tK@@A6uPB2~ky`H#>hqS2Te_iBiru>B#7i?RK4ot&D2%_Ge!f+5BGQYE75g%TRQ z!Q(7$kE?I4rsss;Qdmj6hG8l%k0mZ1F%yO#g7nv$pRg}~w)1KP@}VLV|N13z{per65B07%6n)6|dVtbyO}qAc>f0oQXB*3| zX8NM_!Wowv9oz}Pdq_hDCrH7x#(Q_k&SW>+cLQTfk*5Xvr#MfHB5f6DV*43`gV!97 zx4l2yl$&05JNj$d^n)`K<}k}B>8St3G)9^VKQ z!rASYj{fZJU8$x}Y&kyIE%fh&7J?rgMC5fMW1aVa8#&5A(0wP=KYgq~ME+$^DInQY zg3*~lW>WKzmsd|O>GTFkuV6hwdOD9VE|O<51LVTHsZEG&uDLJ~XE^#-I!FEf63ODz z((|bN0b6r}Bx^@K@X!@{gpf?$XZRaHD2`s(KOR4kUa&MFHt?iVGNgTWYUF+cN7w(A zBvw9ih7=~YSJox5>#GS5(~J_$?pi{U9jRCwPOG}dN?s3b4k(J@^XZ9{5N?!-itwXj zj`6k~`St^Yt4>|$&UsQLqwIZN^*xFQEp;AdO zx^-G@{72|cSSx318NA9o6;%wgbmz=XxN0<~d2vMUW6-q4+3^#9(kf?E;t==o-tn$7HiT7^A--c=aa9mfHiT!V zfqyn6#fuL&82699NgSyPj?3TD!#;0{mer1-jR|spT{|4ERA2Pkr_bVnJ)?c#YyYqJ zCOe8>PiT?9(RF#9S6zLH7^m%t8u8;dDtgJP)-#(8O)`UosvE57td&#gA54~=`+6E? zj-K`Za?8r>_iLQFNI`#tc=inq+T?+f;wv0nfd-l7_@td>3Vbds(R8dqjbPx~BKF>VEPkH*( z9CTjM_ZxT&s%Jf$j*Oa?eV5X0-az|Iq`?6(Oe0HqVbzWt5v)amn7E zym6D5_+Zxiey)nh(7Vs}^u{g4R;FmGJ7)4%B{Bz@pakyrI(>BW4(Wq@S~R()fm!&C z`M}laTT16;QTu(KjaaB?&Sr!`1A2;5l0jp_HB9dEF_4Yzn*n?EZIY-h5F;>g2|#Ev zFw4q?KjDAqa7+@*<+U=sE;e@QYgS3gySa?rN09^1OD1LW0xHul(5nQf8SbcrCUf99 zPpH`zc|7kii{QYALea88qWs_-0_LC3GufF?&JZP7Kub%Yth|}M_Ywk+vAUn~v_hK3 zrJdTw6xduKyVSLJp_l%<>T2`J>IeH^%8;}%Fw4s7M^mfoo{(VtDB9<8O`GGc zyn)uCVk5~7k`63l&YL3B)-mxc1D18v$dew1VI*s}q;8xF!%!d{iiP%gnCK9TJB3b* zQBjHIo4KqRp`$%^S(({VKe(IXv!|HC*I2o;o``%Gx`%cpt2=5sIv*xSW3-phf3WZR#ySws;iM*%1 z3@0b&7KjolpfT6&;(4scKYmP3%b=pSA+PmoNTUNrC8ixC?mP~yL-D})L`S=*I3_T# zj?ZeR-&(|UW9=sUSrLa`FYZ^J@xTA@C)P9@3gdY}0WS2I{3x|I724I+r4Ujs^nP@@ zsn10|g0KZ~UfhPY#Gt=92#|myr>*dzh05IJtmzOoV2@eoH+WM0fjw8Lu>Aq#RS?Tfd^QGDwq|UwG7MgcEhl#bv z2=k(f1qt@b!R-IMG94ItJ1Gp>xLO?kT8XxRP_4!EP5!#K-PImzUK_;Ht+^gBYvz`$ zCs(-G(4L;GNcXxxVVIbW@J*u5(`-S7am#DenmX2I!;EmiQ9q4G(rU<#!4O{#9 zQQfy2TW^Vdp=nm>_LS9qH{P_895MfHyUIH%hC>K^BRJCCw7-pie%|0NYo?7_l86ht zJl_!L#6T$ry-5RWyaQ;m3uf3w~Xf-umB&WCpOsJ9d;@_8|8yFVdKs2>& z7CJg$`JH>Ojn0C22KSM~h8S4rkG8@~0&=iQG=$6o)Q#jS4_wuB$&c4hp6&m9W8zxY zW!K_f+1~7sv_njdK1+`$E$%J_GYCAge4R21agGMn4#7-#nmQEFIg5ajw+;Nr4m}M# z=>!mT0z;&p^1N_vZQ0{`(3qhmsmf4{oSqO%-FWJ60?LE!~KUFSPUPIGUIQ4!`r+tq{tc&h=^ykgFY z4+lnvM~mM)K2-T=bh)NJyG9z{+Vzl@7%c_bqzIHn1|0Q-*cTKK%xP#KqUyMDjxW`{p;RO7**{$$+=n6|hvJ&Umm=0;F ztn03%{W$@Lc5UVdMiKI`il6v+xty0PwmVKoP{skC=PVU7Pb6%1G_%CZ*gxqg95Ul$ z!nr8osB&(Sfp4WBRKp}jAv8xEl>laOF_I14he#3UT0gA|_*|Iv?A@xFm~#{l_neP= z`jM^SKvt%Y98aHkwA#R}GgaL4?oT|%-Xq50R*l;e)H}#@u2pns>g%7^(b+y+Ew=#a zA|j|KSM;?&6sE$&EzWc;w*Z(MP9VfAe-T83Zw!kHD^c7e4PU6NBJ+-2ecP+MG zhI1(3pMMR1{rcp3#qg zK|sswzTNhs1a{Y(7<>s<U1_Qhimm7rEQG4?ZOj@f4}!!brD?nav9DY;JW%cHeF zY_~A1jcpERxlX9n2I#UlTr!$|I>2-#nz`pDiFGoDb$-<4YcKg}*gkvfi6C!*Rahd? zd);;Ey972IEl%^8ygaE--+aekfMq%O&UK2t`usmFfWx=-!K_LH!ZhCimfPzES?vmL z+!&au@VWWemC7OG94$<2e2UABDe$r3YIpx}Z4M8QSROx_(&cZcIYo^)*bmT&(y|%K zl#o1m`82Cto;BK>dB8>_hM$^kV+Bl#_V>TeD(}W+(&RaU_J@ZVjQ%coj1H1T*Bu`# z7)tDV5x#o2D9k|PH~4A;!7%e3*oaK@0zR*kSYf=3h=bFo!E3!43wcbl#jZ*gUJsP} z%yU~>nPT`)t_;`++~%iN#4AO9ljaxhr~beeQ{sN5#?6mv`<~VCE!B5& zd+*}j3rWv${SGtLP$8QN&23VqW``8G(a>2fCgT1%n)`G`S1Ui-iMBW-{e?-)DDiTX zgT1}HmKNE>277gi{(X^51_-NKM@gDz!CnYVZ6lW@V_RKL7lq=h`>_ zOUc+|dw-V@}EPar4BEc!vq!F~$n8MoY7m?%&TJo}?}kz4a`*P}jF8AUXm zR^7{TnSY$sb|bv=@z?a!VSjk?6#cr}sEzT;b4aBE`OAg&gR#u)K1Qy?Uj}#5-8OXxNJX-kQAQdB42eAV zC6y^APu^)5e2KfoBV;OXtFEtz7Ti-ZAJx}Ae4Ag@Uzix{(IB4vmpeU z1_qh^HAu^!>q{3DNRq3Dz0#MUmmAow{g8>ds6TBZKy4}#^Y>&j8-3}W{tWGAN${{q`w^Yx@peL>D(gR$6lCnGs4r-G7=^L( z;(7!jbx8e29RY%3%qqtr@PuJr!I6vsVxmqVu!(Lhix=b-J!k_JECh}HJSB)~B88WK zmaOfM#4V)nQ)3e*;njbF3HJm)IoGL-S7nV}(u2nxEfQ(y`qV&nJp08==<_Erwd^s$0io%Y^^S8v#gk66*7HG}ZH3Q~i; zMk-={zpSUhMS(78u@*+~u`)2ipaJ9ZKTGvPPwpC@f6k(hbknx}_p1_QVrr8b+9A_3 zZ^9TD-s?MYZJAD&WvS0mA-XrEYP4`oxoo2nzNJ`i8bM2nvZCi@V?QN%IaH3<(Tn=+ zZ@pNCp5<#EYj%ato_eiqd-;@7L5Py~@ltH}7Vg{0eT##-6TQi=dDk5UlxqVu>%kjTmjty7!e=xwup$iNfDy+Iezby&62UqAEF4Wq-HEqNKPO znpTlEjr&71is`c;x*spe-+Tv9OoQ{K zf@BZ$szf>csSl?npzCq>!+CFLHIB=IL~&BKdx}gpueLk-uoylxJ{0{*&K1j-A%bZl zQ6}db^0td3+F38YNOf)cZpgijznlA&#c2BSGvAH(nrE(&4eqPDQvpZow)XZzaT1`8 z#BkFlXg3!)UeO2~j7zklhD$AmVqK!U{?PI%Gw{oKeo<|f`*P#2ww$ErJGi<%<VjjbazTf34(xt2m;bdcPStU zh=6p7gtTsbt=39n`A4t+ zg^A^hP{S%5<=SKiP%LITC5dg>>77}%4vtuOD{o0Z*a@P?HQP*xml%%+5#}i?pzvgXB|M7^? z%}0@}MdQzSe5dA29sA!azF(X3%4>>yVry#pEdHGm8t;B{py8k@$Dduat=sHqE1OV* zkwEGmK1dDrHeO9h3Ekyn_?0Y77i}l<-EVzee*vzytEj55DJ{5zzQLHhkJ~apihS@j zwNw_(WmW3fs>U}Byi{)!hP)pWo}UFIk5ku3{ZJd_87KICQfK07A~TpMKPE*=F(-=w zO2xR1MX9qZtgr2*8ApfzX5{Cyv9mXmsf+tFz!c}X4@hoysp5$7?mg#Ulz!lLC#U== zca=@t%7XpZYj2u(O-oWNG;PPEK=MumCh|?O*A%Arj!Y`Ypl9h1N5IT#d0)rDVnzm^$xpJ% zv8cwAKXypX0ORBci-(^N4BUhHkeJ}B-3|rMw~>*N;;O*rtCPU8<}zxa0Xkyv*q@LI zTZN}juY=ik5NJ4Q0Iyme9zPixnC)I#(b)a}P_k1Wb1kiQ{~svRvQMH4zJU2~*Vkgy z61oX4T6GmxZ3dKDX&LJ28y+MM2s^ib$p{?1oR# zApgNU&fN@^s$YNOgn)cLb;1DhnFyMmgM2k6yjSk(m?%80j{1TWmRaacm~VQ$L0NOo zk=1)i*7;5?ErK<}ixlyXhZEi(+9$;7k_Su;AjVu;AesEVg&Lh04S zg{h+Un~8k<5E2g3;{$$PxY5b74uZ@^j+U5U_-sBm+M(Km3W|-^nZ*mnQWhP@Y+tDYJ&mi zq@6xH+4M8O2=}QRKBpCcf;-vG(8wsp6=tNA)0;ApHp4oP0fjL-3dp~gxODwMv7^QR z#M+t@i9BybV4hgeeKk?$$M}#7EEg1afSwf1iRp~aeV#-gWFsl1fHDFPmQ(U%XRxgl z;Ve|GmUY*7$GmD{ZfXxzrVYbWHj*lDw&!{r7=8Mne(CC@L9LPFxCQa1Q8{)0-MP;r z))W}=x20dT;-?0CG7xJ2Un_?T?Z<+FJ`ZIZu)U*wp&o$6!A(pUdOh36phcniik z<>#LVI^fc>2Q*(rO~)$GExCkm!mH1?1IrY6wlDY&E}#fQF+d85dEUwf&L(ODkoP&b zl#u2(-DjSpBpT7|`FKgfjz^~-tgRGZ9h1qS>GV;4ouA^3#Kv79B(u3>)*yobf>~(a zCP=4|7Gc)&8!g4YC)N<26l`TeN)@o*JJUSIgyRFC85pkBpH#%kVi?Ae3h z9BI?T9Q9_|7B%7^v@=f43?LEDI(U0az~J!2AI{LM@Nadf@W`~@egT8>*;v-}C~B zR|<2>d;Mo$m=*h`+!txTCfkcd0^x}@J;UXvrJ^T}%VNOA8-7f*6UuBu034MYNt~N?S&Y?L zCeYkb?CXr7&u~$Jj80Wx710Ae(o@SHUlH4bYGEMS6BI|`n8wP=7YTva0qWt3Vpf0* zX2As{y5oX)pNS4J_l>b=UH#g~{YbLVX-IXjkEw!{$T3A|%@al43@Lgztf-v6G!{k5UxRMQ09~Wg(!g5@qS_m@ll4cxw^-pXLH%E` zDCuIuM5-vT_WQfD5psyWlW+y;)4+DU=SNQg03oN}AgHy!FdBe%x)n}0TQu|>*XBDo zdPJ)2D0bVW!OKas2O45fDe)ehzuS4F)9AI&VPD`0TCblHJSI? z(gFMqX-s!__u$X7;`_rV$2Badm^5k>$UXRkgyA*OLHV74CbY4Im5pRt$C=5$#&NH8 zJIo9#;MXh^p<{|b1F(@V^h%aU)ua@x_kQD?)Pb5iOi;W4$~~aSy1<*#@^?O7okj2j zvLQA^XZ@D9^9GTN*wK$aB`|hv$$ins@BJ^KS1O=}zLJ^>h(hQc%@A&Zk`DUJJ2Xh{ zHCCMV#K{|yA?NWoNJjrf0m19RcR6?oOm)wWzamz)Kn%}5r-ks zYzD=^Yq%`8{91p;?(^fv?IFOX>;r(=)9qVu!#gE2hzyH$?vuz*9R1FYIS+o8>y5AY zpUi?6!eFGtC*|UPi`Bsur->L|I`zrdCnMEIHLhmEx;4FNiVF*$Zda}rj5up=femg% zL<4h1INM&$QBTbYiLMQc?xCoic+qrwqg}_T_MezL&WS*R91SngJKUYn3ByDYrs7iM zm{Ssr-wVc$o+_6O4$RPDRFVVG9G8s6c%;N+q|%Nag>doN<8|Ktg;N$0b}2uQmoRO^ z5#RYH!`|7w)gSfkC5r9X63VS80wZ0jO|qmF6O_!tbEvK7FJV zgg%Dhzjfhxp*MY~oQ!}{=K^Y@jZEDbhQ9)w>mIE%H3^{%arL;(<>C~$y}WFNY!ncp zuov@E;)ic~tCrEGPGh{$GxV|std##yE%tFbPiY~ z&#i0!zLSA&=;4}+&3J#lsE(EJ%&%^>pjNA(*X{I4TVHK?vsx_u52V7S^lnVt(ex_i<{>d}-em08Glz=U_qilg$<2E=R-s!#QYYwF9~80w^2iZweTA?2bO!{3b)h z%={CA;U;9XreH2GVfds}mlU_jtN?qk-`Q)JDXraf$z}BxT<`9#@p@q18%?Bti%u>V z+}(b1EqteF<%tk^_?*4e*;rvd$IB0v9k~w)t$X#pk`|?kV$0tS9;T+U7^y+JhQD}P zxzN#!D37_XKE`N#5{`F4s#rESw?@nvO^0R4R`@r<5<)O0AP~SfG0;X{Mv|%y2+8=!k2@t!*IjAU!~~uH((4ll>VY-!QELZDv|} zesg@7x*G!}(2ke_s7}F50u6N|sX)DW3MT38)RmRp<9B7%dkJ4Qpf>*(2}cyH>Q^Il zrIHGYfa1TfvTGf!oaJZtRfLXUsJ${V?90{6x;Fd*J=aPHWXrz2UsHe@2bpD2%`ykV zz74QBD*HW$FO($%;LHc0jpX#_0bH(Okxy-df7$v9egY)jCL#eOp@f6@^T} zs>&%6nlv(?_;dq>X;0eC%V6K4h+Eq$YwUYQ5%}wuxxV99LGaOdII#JgyMYoRRa#me z`p$!+2#drN41xSPyap4YHmg9UgG8?5WL~ox+Nq@KDEVvhk(9B!AK`+MA556!{XU9u zx|t%OTldVh%kgrCBusau zJoAHm6(O`Wot}JAevRSSV!UE z@!0tujrp*ip_;(@2~J*RaU&RH^+4GflD>mBGf=?lm7C|#AlM`e-8#&-q6c4~Vn-=} zWX@Z#amqKA{zBuBL;p_$&+KM{;Hf=w=)c8{O)eA_erzMyo2H*Jo8Q6!DllcPyy8#U z-_8H^Y{U5E;WX&QtL$BFtX*%Y?NfgE{A=0c?h;UdzcoYLknF*HiqLKC?6CMuNnD#8 zOX_4v9RTxM+OzNX)lI+wCwvi9dZIMZ-_s}>!I`JoLcB(E3HW|;F%uy$hwEwG$?jPA z;$W`&;;Aoym0XvSW_INcouY(^+qacdh;cdv7aLskv2G-b)ftnSBW7eK@-iPW)M{}} z{TqlNO=2A22Eg`OY_=~Bh{q!7r6gne=K&Q#TqPheNtd|E?jTGIc~c7l8@M%C4zi3p z4XPqC!vPUMaUq+Sh0;Qi#!)P-3NEHL%@cIviVi};!gGiz&Zx~ips>y0OqJ$lD?qgw zpK<)wXdQwT9@r8L2UPP=G+1$Yr5 z?_878sr_kjrz(8x)x}j(y5N#Ti78H=%ri>J?m-k7Y zlZ+tGF$RbP)I_?~&zK1zpj63|%Yins@izFr#1!wf(w`E;TtCE72n_{+p=v%(*&d-L z^(6Cxs1;!&#ivA|XcIFGZsH;v3~V!T6y|txM!yP?oNsC!yXjaVI6xmEr7G?&O4ToOFJOHNVz zVLVvkf_I7|1+%%nR4vJ+a$BFyRWMT^d`<$9I?w6BzrI}7YqIaXMbu(EzNcQ|#~ZES zTml0H+acx<<_^AEFTo~&+jsf%0I{lphshF;3&T1^Kw+S9|33elYQUTzSeAGnxm0}o zcsq{i`}gn018)^6g=};~CT|};cu?Vc_nt{XUWrVc?B_D{B;|+KA+AtFeF3gp&|${J znUeB{7qTQ}SxqYh%u!le6uw6F7pPq-EPt_=UfE2$*pXS-_By`mLYFyj*0>2ro|0Ua z=Klj+5kPPUqA3h3i~-f53^ArkiU2O)F!OH=@l-~eF^xB8q6lLfXl*WDC5%&k;|9eVNj?^G52DBTj69COsl{nCRiH zQ9spF1;*Hj#zR}PbDBw*lZ+09U!V9kKB{Sb&kSTEVX};OwFFDKucCB6Xp7JgQbsK~ z6Rk3VIn(urFZ~AEgF?!!4r}%o^ht0rN_?{N3=wJ)y^k7pbMU+XS3FtD@ zRW$>z+s5h5vxA^Q`GG!~^dhhLe;-(N9_ss$!7>u6-#()M7F%^wNZ|@THVHoVk+|!y zJH!jNqo3X+s$1NN+%6w;R8CXklY!)Xo1h=(F>EJdY(B1jcvHH%Y@6b+BH5?*vG!R$ zqyg+1f!EASpysvR6YH*e3brrzYH@R1$jIObf}tOA%r zZqra8-=?gkRpIVwczD$Rs}0M{CAgeuK8i5%rjmKmOnMX*%6U ztNKqNk5Eaop?@2LJO&M;{Q%y!*$H7N4$O4xmtOqZe}TtYB)aZKNpMf7Rap_HqotCl zg|gdyQ8<6aHiN3@B_BRqUpfGxz&1H9e8*oceeLPpJ^hnP(^j;$S)~Eq+M%!&bHNtV zu0PkqAu3rb%=TU7JqKuH+g0H_2UoDSi$0A~>aoZ}H-d+r4R5UG&-IkK@?0RNSoN4& zC}QH2Jj93R%tQ5WX=XJ;SGM6vn)4+9BmJT!g0w$82nKT1Vje7;JYlG<{q6Unq)^7A zDdtXJiGCzJ-27Wi@Wu4e7n@7hfY1cY#mE0PC^p5!_z6teoQ1L|I6oi4A_& zol}2N2rrmPF#wK!P*vE2xyTG?*Dvee&}nEzI2$+W{Mk&)2L8BfX_eZjmjBw%^I&Zd z%I8bXY_{TiQ`8cj@{C$V{+xvXxE!e_ZQ{t&@hTEjiTk#>`0j`qoW2NU4>EWpjt37e z1x?tl4~_%J4cs7mEb26?9z5`ZZNyiY)k3I93N2{;qFW8E3{>Q^WdSGG(ei$uYMajW z3IWZw^4C7?R7y~+v)bnf^kU(J{;D2%Af5esEqsZA!~B0i*5&26!p4xIwMgJIEx!!{ zb)cfMA0pNHW&V`nww+iK1wR_Zi3HIbMsGXkjkt=84<7v4+YQg=;3*W^78YfZfp>59 zW9Sb7@~AW>m$nG2`AsfdWoLj-$sQ6z+h#Rqvy=59VJo!?-L-J^UWsbS>{3bs(k@z- z5w7YLmUFm5^Nv#zYPE2Gu)pgOydSX;pi`{mY${wOZ@rL8Xcet4k$LGcp`-jPj}qj$ zFuv#CLk#oCD!`mZoKn0(k*v3V12ZdZ@7q2GO54i^)vvonSupCyuTI4`JuDoCB*@Ul z*1>}{30v$oZ0Y~u2c2_B=VEq6-VM>V{Co=QbdZ&R_O2P>l2mvj>3@FgQ1&19XFxcK zd)#b}2&MbUMXF|P9^izIgi@^&4^<7~X{TfjQ-0}3|7wQ3zJIKr3d~k83fQFUTowH- zHz)}&r-z;3g>0#MF)JQ$(peII{s+@1AwZG&!Hs-B@Au1l&}2j1U1a%#hF*KFX=LWH zW#db~=(dbO+W@xzgeHNzSg|Ol+KGM0GB-e!2W8I$SR;Uo6tNr$)Eq-YLzKWWVNrV> z1|A}rkf}^D0GEz}0_pM(IL^gzyHwkkT+b7gT=^bSvAark^-Y^O#NcJej|l&Wgv42W z7`F)!-Wg~lb)?!ggj?+V4w>*rsgDN4w}s91Vhnu#o(M`^5@R+6{i0@SU%NC*HOFrzn2h*z3y9$*TFL+=(;Vx&)qt>1Q8)3pqqN><7Q{>L!#C}W<|Debo1BxQzvrh zfb(QF{ESx04L)!FfKtEzG1fE~AA#|x8zcJ+EKc0yuF^T6x`2*ip`@SD1}*IQJIhz$YYx@HYtlpN$DH{gJ( zt4I3e(g|wFf~uam%E&9alsv>W(-Xg3Q~$284L@EZfNQU=_}|+gcwI#5XIOk0`b$Rl z(EFC0W}z9-0!bk2m1V8&dgJ zc(mx&evorKUJaKq5zKA)3Ynam=W^FA zw}jASk(Qh>->dT&2&h@h#!M?&r?(_MB)HXu?cjo{X_E!SVB^AJhxMYTAp;BVa_ZV5 z?{{dsdPX6ucXyHkY^30ckGN-_zOIXgyFxlVWLloEX15pnpndu@WAXim5ADF}HwM^h z`?_JB$VC!l>9Qsj?{(075*?NN;r!25sVo(pGRj$i?nEXV=1?~|!cj!QU)cyZzF8IEIxJ)zoxxK^Lm(J^eqj?V z7Z7kO4Ev&%RzF?8ZBD!@0Gc7KGf45CQtBR)#!0vCE(%Yt-}sVZIqVuEI)7GTRWX(b z^xo8gZ+`@_6@vnY&XWn*jZXBiFZENdPvYRlHLh|>B|PD7k=K5eKxp1hajv!pj$0* z26{ZWz2%_-G@1#Wl6UXS|J~g+Bc6ymDr?T;@Ld)idMc#7V(*JmIZ@e6Ew;S)BJ#-N z%936T_sGnXu&a(IfbUaF7rzq6#GH?D>4v_@wCH6n-4||Lilk6n@%_6%pmAFYo?2{& zY>p;DP=BIc7&tWU9nkk>`8+pelwF`aF1=-F_z)OOVEF^>Pl|I+W1&IqCGafNY+MA- zDLQEAUbsyRBs7p{;D=@(MSX#r$1ZR&!i@O2Jz#{=ZE$Hc*LM%Uk{h|w&@yT9qH6Cv z2POH|zn{G~wpIvqbz&5lw5_9|{>L$36VUkG_wEY7Te%h^9sq7bT)adm1US$Oe+A3u zgf*QjlvfLOisZ|V0>zstk>c`d!J<7%E2m9@{a z&kN5?z^7c~**p%4iFpeJWQ$B7nw7pDUf-L8l?>oO3Mh0C@1EkiEj-{{Jv-69)A|2r zGRLd$Z)sZ8HM8kkGNFXmU1mN~{G^|0@NU+{FTTFCTa&^^Z8IToHvQyfUdaakKmNl2 zy=xnW-Py zU<5-t$2*-FfSPJ4a2nQ)H)&At;S?A%b)EAWqioKp2t6Lyv|Of2G!1!DQzpNyaqV-F zLLZ?h0Gk0+5pns49!I(P988KWKr=mOickXc$!7|mgd~>mqvxRYF|0d<>0O2RXs7^y z_O$0_@Tyh(%|5bM!y^1|uKLqmnp)n?J&21V9R~yPs)d&iHwKisXIhD*+Ms&8+Wo9+ zGmgmwEUHCT3d`-m%PbQx$jGl_%=6TxA7Rx|;f4n*?LQmEB7S z7bc92ReF<0dh;&mM_vSCPB#Z$*aWqd8MF{7Z@O@?s3fB_R2nxBsuS=6Sl8;d)j(3V z3chX7lmrLLG@LXwR_Lli+ZPLru0Q-cMKrbKQibc+%s8c_!t4bOLWRpUTJ*?5cim_U z&fqt<6oA5NW3;*Nakt|ot+VS!mGlvDbvlZ# zT7w2VhIZhs1P){SwK$D>^$`**d?WScC`R0M4SI2}8s zdzAggbJ1>Z@)3Dc+~AESXiT#+Bu8Au5se5zy7k~Ti}SXBAKF7)2IcCGUKWY8kT=zJ zT2~`6MZ;%5%g2q;@ayJo714LGc%lg{y!^29Je3bChpF!LW`-}_wy%MM?w>bi!rBGjq3Pw2iR5r6^=#^6S zsV@Y_9(-FvLQRufILcgH=iS3uk+L-#$y^ohL`TWs|T1?IIbvs zf(AEa4#dOoDvalD2QhvT<^re(L~4KC(!QDog`Xe4qGUR=7PudGt6TGgmt{29>KQ0W z{u-!6H(%b~ljMmhYrw3!58rF(wJBF<)JTWu2s33u0XbmkRyZ30kl!JT$sMfnM1XGa zi~@|IfYyeH@I0cV037nF`5X3Y{}n5N|5wx1T`@HDF4}iKok*$XeT|bZ8uQ_i&f~W( zTv&9b#b3OPWdrp++tPkKJND|*LoXSe{}RvZzBQk)2!y8BZsIV*!fWN>RF{@6d|a-b zI*c)09?4-FY^5GCA4IoX3eSLEPr~p?C8e=I$ssgR*e;W6-B!+{aA2-qKGn>6M&6J| z&&;BLeeD|6ct9y}%jTVRjBPGfWj{Dx*<4K>u+jyneFZA_OWi9!c}_>hub2op`KDgI z!iSWJ>KOxL?%tf?`Zl|L@Who@&n-%}8 z9l?E@04xvZ^(=H!dqLO=LKjxbXodvyxtOO$)uroUF6Fa&xdKeP^Qv-wO}&lq2colh zmDgz0?Tn_Jg6B*y&6*>76$M7x5=H8;z8gTg0kW|OIG(OvdOo8dbw~oe=0GshS-8ms z>m(pE52zD6yY#V-vATa)6r53oCYn4lu#6cNE0m{Xni!Xc5|-k_ZV9zN7xS{diR)QP zI5CM|^ukVXMSzyD>w1RQ7Os~hy0C0GeRgg!PcD63t< zL+5c+@{SpH1{%xxRgeA(-=BVN#rcev??H@AVd*+D8Y8C zj*+7!Is%afE&U-Nk8Ue9ZN2?ZP9jEz>Mh zI4*x)3$BcLXS+6C%W}lQ;W6p;8u==#ZTSJKV>A@Kjo8~!)^CrIv)3str%BQdDgu=-)N&05uyE5E(<$?H>s}dgCIvXSV{Yl8)NW!u3G&( zL(JKV5jQWNL%Y~83TZ|6X~3FWO3ivZ1^B|`!%F(HIDK|qD}K{uTv+9u^~kwb#(5rY zWKytBt}H|y5+@FhDFs>ZY|GYTJ>)G&#k`6Fhj!mv1_rdKTN>nuVU!yTGTA~vzgVB@ z6OzxCLZ^u6^_{`b{D3R>sf-17yp-=+2?KY;c?=K^O##zq_}80%96OV1q3KlBzY$9z zU3j1w-AsRR{$`{P|NKoqGXi=^8e}Yu4-Bs`{SwCvs1nkc-~reYN%nvfEek9zu*U~u zy+iS{ZZFWc|k(PdaIbK?R%N2>;Uxw6ZUI(C;0Br#aw`M2%yc8kJgtVUbXx5!MuV`HluOj zJg~u?GQU!*)ov;bN)qkTL2JBE?Bnghuz~`>NwY?_kpUIUH}zs2m`3pQVz+7WP)%f; zEmkEhY|9*4w-Q{Nb%|r$4#(Dh#}-KrAx4;o*XBn$x#WQ4F4jdK!9c3K9)qQ)QF0E$ zC1bNj7jXXBd;khpE3?1IIr@%0Kj7tEPGmRKJ-ZEPYv`DUSwXKWn`}+yd4B5^%4-aF z<8n_+>weU?htAS=+wl4+o=11k&s204TU*IJOxg@U^@!(W4qK<4S94t=5J^a_WHi<6 zIAipovV%g(p7PHDVep;Hbs8DA`A!$40YO^phsx-CEmRLmDN=~qvryOfmgDh!hm~fM zo@bpl5ToqZkpm_YqS*(Aq@pCUnpQep=8H@kU2uKw6!&L0C2c88_;IxCrK3gG8nXaHv zLKyEFhs#s>)Rz zkH^3K<)QMJ?CWeg|ANRZ1q}Ppi^b3Lkm9oLfH074(TlgSNX+R;K+!HrC6uch7Crm} zI1;6>t$u?H66b3g1WrM3tSo8$gQgf-eQ)~{>kW;Td2Sn@uLyZeGlnWgcw5<1UK&z1 z^3*+~)Peoe-(vT77j8yI4tk^C)gXuFKRH5|u5n%|fuR&_qPK}lVB%BTHmC#;m#L=5 zNd_UvBnB3;GYkm1!Q7;YGeqscqN^VbtPiaazFw|Ii>gG0sgbq=wqjr9}=fI>L&`&o*UCIls zzj5e}Lc7u9hBPx{)>+?V+J#Q7^d)p(HST5A+Pa66Pe-u>n}vwcO$3!qd5n3K@7&=K z)>P$tRTF}KwD}wz%kSYbjZl`1UM?3~9ifcWaj$%&yzDJ;bg)L> zQZ|_i`Km5MU9wFLO6q(|dym<#b;`S~;tof9nZumesoyssWF8UforFOw#@l(~19#0#|k@?+2Cd>!Mk7Fz%a0 zC=ke4BZjuSCF(f5J9Z-V_4OzRn%o1u5%%$r(aBKB?IxtJx8HKP;YA`ZTTl~`&jv=2bY`44 z(%3g+(7pwkm7Avm{eY57C0}K?KPPvx&}_A>Ig$rOrC7eA{2XNb;NZ!+4%yK67vJwK zq+|Cdmj`KW(-w1bpsGN&BMuC08ULFw%?5G4YJwK1J-CXS!1(~AmDcn|Ff|0@q{b%j z{=E-`9>S;UIrsGO;2~4#TyAx!nx;t)&mkRJJ zB*=H3!;qS_HM)W&0Mw#VW#vN+Z_ULt*kjYL6ODZ`s<#oN8t*0Pv$LiVl>td|Sjc5< z#=DU%wv$@?qc39n7oMpHTByhl>8d)d(1jO_lZjY@+?6M?S<@1niebi5q(={sZf1dz zM8{##;JIh(fF|OnUY+#s;g!j*Yo$z$E@3e55_weYb)aS$l;SPv6{U#n*5&$kQ z_}h@5_ZN}jaAqB!4VWCm+f4zp#DKFiU~SEoi4+&{mh4JkKPO=Tb$Mv@j2fyh%6GxK zd}kMN7%&Y_H%JIGG`AJ6@ipX67 ziaIyeK}=T~4^kWbhgYs29$jV3cZq$Ar_pr%^GQ7}jaY2O(?(l4R+{M8n}?r-%65^H zyVTT*hW)z1nq@+;<*P6b2dDj?T@n;oSvU;D_vaSU<-cgEw{QXzo@^Hg;_CR zYMuT8c47Xiw5eONsY(Mr@%J) zdFi-&0R&bU4Z9zUqeO@pvmP)PVDn)KoR)C;e-$k|`=x5#QMYvcTKc-f&6z#zo_gf` z%a8jOcPHwMXWBuXwNA$MJu}Ct877RbB0_ z((!@m>~#Gs8V0ac{N6@j@HwChUGV^I#|B()(ny^0QjG`gMOvj&4T_z5}H3EPCdJS#x{vyoS2-il&tKK z5{~~JY4+c*{~XESbd>eQLhO`Fb5HAAMKX`sw>tu+N+*v9(^O@{%Vt=#Xj}PSd?SUY ztEi=dLL`kcM${qxz&wrd=s|1+-&qo#@Gk$RtiE*bC85#qB8N#wa} zCk)%W49e+1m}v&(7&6Vw7?1*((RTW+np)VFMoa!0iH4xkTfyH?oHiw>RfRWyeli@} z&Dmx(ewOOnwk=`H>P7c&3n&Ral1=!@#jS5I{cR=iGA3uHw=7v&jK;(W# zchhs%2C?rCZspzQ%w9RvD@W189G}-B4yM01jgH%mra=JG<&Mx1&oy{HXYVim0KbTm zH|xP+c_~+5ALo~BU9j{08Cnrn|ET%Tv+4Yc?-raT&rce>TuKhP^~orhjzEA#{-pPH zIN-!FU-Y|~NyDd&P8W<2u^lpdLI%cDj9sKt9uz=Kzhq)+YidfKEp(NqJ0g@_;9XLB z90PuLHBo6n+y(UcGDejjKSqrVxr2EXGW1e>#|eFtT<0O zl{{f^bAP{}M$C{;&MrYvuU4)F$1hUiWtBUY*jF#P6w-M=6W(Fs!4ovgSgm%G~t-K*g-;9i8h*TLzHZEB3^mojolKPvE#(e&kEm_## zR1lvUo({-0nV*C6&Ef@;J7AhTFt5(Nw`posS5%XK&zbi+nZrk)6)o-Whx8zUgq2Bb zbo6uh4I3VlqBI+M3&KxDEaK73zouXr^hJLMTdTk@IHZ8i4=fZcgW=G!(;#B|YW@96 z-y>nS`bj=-9W&)PCgcc2nr@*1Iqf60u693?!Fj7|r4Go&Dyc#Q_4R?li&BuOlt&Yw zo?qtfwYE`=JC}xnkOt*vQ0vkxpemA9cb7TqC%L-iCzXcSy5r1x@s2ia?iEJ`9cQ&bU_ivKa8}M$=4@CIZTPYh}n3^;gX#k77A_pc)PFj?9k}!B62;M zmOt-a?c>3x5wX9@d8C5lkGM$c#h~G*&Do1=xcct7B5WU}Q}0MYcplINzXF5AghQy_ zMRqLx{R=uzOwe2JRYP2(#6#`w?2H6BK~s_H@If3C@FTO|l&WK+w6x@mjEshcZJ8Lm z&+XQ}g~K!Qp=zWwHMN}EbZ*n__97XHR$ye7%n!VRnC(q}9&FRHr$^45 z=8qnC5_PUSFKS&wk~O~)(qL0^?c5}_(xBkvE^Eh$w6Apus?26fq7a zp4ilSpW1??W1R{Mzb$Mrt(&mLNQ1pm{Z3@XPgg!ZH4+QuvUYVh}M<@KDGSZ%&CjL*d?%{Pl-oL0H zaWvqm@FK2ne+*8GU)()1j)R}g1P`crs_V;^H#P*7H4XMtay&?4&0a8(eZ~5Ir&u7m zugH-mF6h_GEg$-qGIG|A)HI~=PI=;t&+ySM*acx|k)z(0nx%|>q{Jf#N_%BusW)oi zq(<*s*h$BE<8o5-5Qk#iv=d*Ia-!m(DifdBYph54or4^PQ|84du0v&NPV?KDhUhF6 zhtp2lzb@D{n1|f9uypH>XF6v{xKNDInAFG5ePwcn2-y4;Q&UBg?JAG8X z-@B|mD!Qmp_~2v&>NmjlqK$cQVmbqNcU_R`sXK`~w6oQ8VQyw8y8hP8?N9s|VNZk( zAmYgAg&>cL7eD@rpJ2y~$Pnh|vgNuOPOZWQd#ZPOBU9~*Q}=v2(bPgUH@2gN|9g=u_a0DTEPeibK^rZU>2@570b4mH;h_LoXx~y`XFC@c#|L8$ z^wvlGDe+yTsAGhDGY12b*TFNS1dS1{6Or};)n5u)#`?2Yeh5unSz671pnE+Z);Is1 z#lIe*=zhp`hRf>>X8)+D1i`futN7=O{k7S~hl15}b5he0Tf4 zlSM@OT;YADI~G><+Vj={#d=xGEyG+xe4{T&5?TcZ87ivVFXS>clcI;COL<0K799%w za;m7(@+K~p0IOMr0_!hp_wUah z;xL0Vs}s0{PiT?rX}@P>*zK(DI_*Yt41J?TG#u+pxJ_Lf-hRL|sp>!61sf8pIj?0{!D z(wf>);}2MZZpntf^rX_9Q|Xq~SB3wgc+aGR{myds>^S2r{-llY0y0h!vN;m~6_6bQ z20;eMsI11rp<;xULog8zZsYI)=q_lmzk7JOrluyOA-Z$!Q(?o5?#p1XlpvBY1lf~b zC$=9nP3I+wL0CiRJuy0p+c=f6S1l`>$oBf~RXly2oiF}8huOO+62l*U_Y}B9W8z+q z;;I0SPr`G{S2*UkiQ+Gjij2cVLE04pW3LDwZwm?u%_0-U zkvzs^X&^`M@9#Q8?i;nMe8?cPR+voGXdblxJfDc5%q0W-&a<8~${1>`t5fljO5? zB#YE}zWfYM12oaL1^>t7T66df166lb2`tkHq(bk<6$B!{9V+DIUrDG^s(xoZmlwA~ zW@&-UAYChd28ikO#g;!3yKmHTtvEDD4fDs`bfkeEmzVpGvT3hQAK-zO)kqrw>&HD! zK~^)IOZjTtT|CM4w6v2?%&j+ag9^BNikZ;=T}OFPurUkf1XrQJg{bQcT(Vk17H>*B zJLsS2#G$4UC7iK6`}%DAa|d{T3*KumTZO1{Orl~w+!@u`@%{aM2jJQfO89^oE^$

PW+_pq z=9l1cain+scC%KM2j0=X-aWWSKYPZG%ZnVnEm-8M5bGn~3W zqLHKFOr}`9L39-WH^Pa}r#DykYU~f(?{5QxQPG01>Y@*rDP=vltdRu(4ga>~v1|e* z*AJ^~USr&g4uTqMmYQ|22+c}I?qW|N>5k+XOsYlWaT_)_Ha1pS)b0b11Nqm$d60=Q z^8TxJe2{S_N*ca;IXXXo|8%c02Fh_Ki!pHEEf4T|QUl&w_N@fM_!5()W)~V@_R3E& z!qj)+FihgCEs#i3q~Zh2d-6TZ^?k6jCnu|E+s<`D(DBG-h4AMmBll(jH>%gjcHiPh z5Uk%S8q6Iv#uT3`RNB6~Rp;IoyjsMftHVcBbWc2-D?{ne^>+o<3JRUyhQBb4iN;Vs z4-av7ZVl|Qa+q~h};CQUgm(v zjlxc?LTqPs44f$-XAgE0j~1TtBJ=jneOU`5K#ySN5(9xxX%_?!lx*|uk^dq zJru39Uk$ldQeF%n5GqP5`+Y=!PPNNAWOuCI0}>VzJ20?Od%RJz#px{V#4_MWNC=M@>zQ zh@O}@9qmQ_{Q0y0$IkaTxFf8bllGd$0=R(|+B-L4E9?UX%#VhMm>Ba}-aTe`sd-Uy z{aZL@jpwRslTOj^o}1alqTf)VG93FL3=Ol=x~J@*XG{^srlCThLia%MjwT!w7YQJ}Cp0(HIsTcFJ;&J7vU3&=W9b4a?WAVYndf5o%z6d3Ci7>~RzXPp>bW22Ie?noR zWMf$yz<+ngG2W-rYmuw`qD;7|=hYY{3IZ;JXb-dhkUahktaKVC`+41edcKNEERR7`` z`LY8WsdY3a_^*>;ydqX9*s$@+=aSY9<0xE_Z@rICab?0#DpkG%W;udo<)fin57_HA z71&tJRwmy(JxaW+5?IO5xK(!pu0!(ti@f0&ctd0QLzf_y;1wEO7nG=ZU2Bu=rYOV% z6Gs{<3MGw2+sU8L`Qx$2QYCa7?4>8oDg=b)CzydVn|BFPsD##UJuP+}8jqjnJKR`g zNrhfR@33dT(j?Y?r_?F04>;6Zc2W~d>-F%0^Wf5P9dO71KEujDieqKQ54VLco*8@W z%c37ekFM(%=}+S3UTZvhb|V)fst1UvSz?fY+=;#=h*#XIoY+^PBAX~8=Jj+T9Zw5q zkdH)%3=%jkPD|8X9Df+Qr(3w6pvr*h;pur%GDeRSIzHOi)V>y28LUA~-SoR7pZb!& zhXwOL+@{O@*x$M*2#|_32{Yd4i*5-5Dg{f4D8I1_#oZ6r-rvOvv&JR;rrA_cC0?dV}FBwjl_ZdsMs@)W075XKi zb`d$B^P<~_nBpGfj}N{f<>kV_y&kTuu3Vif4OxR3+wA1cv`i+nrghk8Ic^|z&2nId zuGi-d`t|iS1@W|cn~|j#_Fv1E++${3MD>s;zpSmKe66ShZjz%k=w;B;xBr3b=vm9Y!X1EtMuunSiFAo5xcj2}$SKD(7Xu8_jN}*{9mw!4CSJU@7{a)8; zU;7%z&3zGEDwbnR*>KoRZlD#KrM$oP-f_vbUeQ7^`K9QVf5PSSU9?54$RQ_qiqlD9 zGeHf=yXlg^zW4#_Y$fmc?YuCz!s#NxKErVkM4RC2g4-gQ;L}}qjO0ZD&qg2s%5&}K zM@cmeTlJ!oZlj6%ie@X1SFek!vC3ia7U~S+Y5r~ z@xeS%d=xZ)(&WT*noIB+_lc#a7l@)qKe?*m8TS|dsO#LUO2fKN!2(N9gE{2wOX>2? z3&tgcI?}Nu@v|FZ5;#>D8`X98cl32-%{nVo@E=JRp2&%|Cy02daob6M&udmuKO%6V zk}%W&mJWf8unN;a`*rs*ceaSR%*dxle>u&`zc)ui+|?v}Swj3WyY-22ShEUE`&|Y_ z!hHin%yAz#gUJE41gmIfF3bfU{2w0KCa+iVZ#EAbuD$;za%Y*n;j|MM1CfcehB1B5 zqVlB-3D)@p`z(XMmLM&jYMZX}AG(_O8uIfl6AN=6;afo^FMjafS-B!Ys|LS-sqa&j zR%oCUOd?IbQ>*ZoGyBQa+LMjVcQUF7_9v*fX8E1VPz!Eg;f1!-=LZh1^ixy(e*0Hm z1rmk^Stq$(5>Vs3c}A)czVt2{t$IHv>Hkpm)j?Hw(bjZ#cS(15NGl-uq_W4F1E6+_c z*rF?!$sy^RS}M51Rm)2uN%@5X$46V(Klj&~?BAtv51Zh9`pJk;@9PVE_Ilr!@#(~8 zS0m$K)MbQrWn8sA#;~9qGUD23j*&k(sU&&oXyQef&<&X4$(1?WNSYGeu6V?mvd>Hb z!EKZ3s@f?E!A;ve->ut|bV zwt4elsP&CG#DsTw(;R71C!&P~GE`5yvVhCSS1cmX93I6+}^a+T2}ap4PWOk*5$ zLpG%@C;*VjS;!z*F=WZVk-U|)LVL8;DC!F zX`}6cT~|6Dh0@iGIY4YG?=^fZL?dK|r-lQK1oo*qVU}p{lwzX@aEs74OXAZPr7xJo zVpDK9u6c`x!1;tENktcxENbM^=yo&DDHnSWKw{N|wR#Ok0dQdQ_g`lHXl)n#f%kG2 zy68x`^f5SXOa*(hF^oeXtp{s_t;Y!GDoU zx9a&}&aV}vZ%Sq&wy2o%pM^mKJ;SD^5!DoukIWgU_qz*$8cki&9@hmiChyEj7w}!8 zW<7(ZU>77Ja z1w>odnQm`(+fMx8a zS(D>UC?*LX+X|iRA}#$2a2~!!Se_9l#94Z9B&bG#2YR})pnKKl42JQ47@R#SrpRZj375devQU&GQz^8F9Rm9twW|hX7$yRf|=uXg_(v(!zlI!O<8T zM9(Xm*F@SbvDd|3kaJdB;$|z`?Wcq~-5y}bv6&;}p=iK0))oGq;{qd2xX8|K{vxj$ z1WVmWz$Le`h8lU;+=`8I7WU*ikK{YIcDOrH0AmAX8kYQ-xoP}!S%7Srw34!>CEL7yC&VVE z=(Kn~Cq$3Qd0EVSvPn+NumsWHp_<)08@ zb7=7wty>{cu}scHpB~2+2bnJtp@-xD5pb0ItvwB?UGny(}v#Skx%- zVGR7lYp_UVH{!+Q&31{ajQ<#Ts#!~VCeyvL!Qh0Pf}E5hO<^y%P^I~) z!n{0T#dyIpd$RcpnAyVv_PJ_+HKrX38?v#Qnl7A?M=)}++GGdwm*g*MFt_|eNY>$} z+}x1(1K9f?v47iDA%hF7oKqP!y#Yixj+5f)eoZAZKoO-= z)y=db5piMb`i+PG-#sIZvZ5-$>-f9c(6;6fWvfh8u!n_OO5#gfLe3dn7 zNXrRR`M6ew1E-`++wlM*u%y27BkWU5@{$Co> z0H{t%2_a~`5%HCc_v=vw8mJ@#5WxkElfcGT5%BnHi9qzg!Zq>-2)Vnj$>lk5<>Zp* zoZ*A##3Y4&Wy_}0*5UbgT%VbOq=^X%s>)#iLf5a;fj_VyTp8pUsY6mSj86!nvH#h_ zHpq5+mEK@d0>i%c#GI`+_J6{=X$ytFNPGCVSf;R`b|tJrtp{{;5<0HNMLyc_iF9F* z)(f=-y;RD7b@XOEh}!j+H%Da%8#izFjIc6{!)jno^h6 z2x-x+;UfW4Z?&npfjF)GdZQHWt6R1}1cMqcSIG@Q+W#z3R>6hDCi8!50ruXZan7o% zfj?1DFp*8sd8SFZZVMA;ndqZWXR1#R976DfB~H={FN-mRUOpe|DQzd7(exkV^IlGRj zQ$F}cf@JCwZq#{q_&T@|Kh>tyN=<-lP(kl3Iu-t;gaxW#Y7IHqOif>6Oet5@{{Cwn zbo018;Kr|QbNyreY3q9Z_(I!Ua)4~%sS2lW<32mgXyEw-ETR9`50`?s2srwF*6G{Q zNPz<0%l+{ZcY3e0;{Q1kJpjpZ`c}3GAjsI;exd07O|^xikEA(-(nHP!22jwX7ZGJw ztdyk&rMWbDGLRDinLztLZ`xjhVlXSIalPOgR&Nc4)9^MGD)XQ9ez<?2!#Fn`$2h8J65e9Lhw4uOlX zEZV|ub6baPU1+G=96MmXr2|)moa8XUDJM&G6{2p%L#Dh2I6h2)%q~#kaKH& zR~VK{8lcnB3VzKJSmL@Yw{x5VAVwAXE@Z>3= z5nJUa^)2uEi}L+SSP~kkbzW%Y=sF^Mk~2?lz_%k@43aqXBbz6N?qwTTku{fw+EEeo z{Iz3T9vS`J5NC{HI*Yhkiw@V}?y9oB-s?4lyEd+n!O1<4tvv zE~o%6Z!Cz7tAFGd}+)DF>3 zm<|?gW6g?@hOz$^zM*23XReX9Gf<`~nNu_yzq#lAL7I*sn$2@FGbM~^w_Pc0cY5-T zDe!fp$8xeOLaMRg12aZ1M(m((SsWCN!jFn%sd#*d&M|khVK0t;(myP}!nkn31A%UQ zeK;_mH+R{yo*MHp&%t^T8enzUGqq23*<#V*XHaSL?f_3f8d|F{jtJZ2q?oIP23NT! ziOm89cZ|#%*@--$^6;XZPYUVVB^q!Wz(hdZ8vcaMi;{vyr5dGrbl)Ir%#I8<*`B5& zRF~maj_$3W0ctB5NqRTyhjtDN@NoPTz7zu*e)(xmbs(?yIurVMBC5m%X@nhB9fcRDy2386UvGE)17c`PRalg=kfK+Iw;W0=_ajYn zC;cE*!S2NhM6`)lz>Cp_54X40Fnl1xJi3Ej7C!S|Hsq23hjn)xYmdS;3etU=k+}kX zFhI29_AaE&RJ4SMoH6$os{GV&^?%!=XM9$Mnx69p3D@sZb>jCg`ua4IOmDvx)F0L4 znmo2Ezk0WqH&CG+Ji@Zib0b9Hm2cuDR)Xtw-pRx1_OR4$zu(trGZadfj{46*@z4xT z@I47qD3}jO-uI324U4S3$G=M2a}^%{2?&WWD!5R;LOxAw)r?5;X|}p=@`=`X!glu< zJT#NDXGnZ-8TFI@(sr|yH306zhLAE06f|JJpU5PgN-<4^P%JZ=Wy%V}K9RB8$=QnG zNUcs~v3cG1-V0qD^B>_DEPc2wc47zbzu8$E1j;&~+TXA1!^C?y1=&s;Bf5gxu-^w9 zH$wWLC~1Uj3H?TYDjV5M`noVUZhp!rs2SB>hUaVY-x?X2L%WkRyd`2I2 zJdNUpiwwD~ve`geZK=?Vsnw+WLOZlj2*b66PO{>Ixk4*@$BCI0{ZqpRgOZ!~BnV+rC1YXK&I+=C{v zEEV|r`!_hzTCera+8(Qx8v~<7(H(?x!zvQCEGwu^c#W|xfutq$%1JvBar2*E?kBjj zxO)^cO<$H&+d4HV?|#GPmlE1Qm~1hvV$2@iB=^~2ZZgo2tw$ec##%`TRrL6`#|~T@ ztE`rlQ+5xkjzkhf`DZ_V8-?it>W`(I+w!_?5cDJ&KobX74l?~!);ApX>G7o2h8jsS z(ElLtexC*jTG>rBK;KojJs_pq=bxo^h`st#wW1$hYZ{@}k**a-LIQ2>4rFE-D_n`R*%0AUQ|@5B zlcxox(ZTITqnKv_NNoHRf)Pkmw!YrXQ1>P|xDrdfu;yv_K;W3=Cas3<+yCFmOu6^S z(1%nF$zz~Z_ss{~$|SEMPFe*;3U z@9Sgl$(TX?D;Ms2IJLUg%mWsnAnGT)UFS_2coPQCdR9k-{*Cu~h8)S?r zxvhCI-#H68Z^2KZP?U9Tg=B*m?Vk3f>uaL+-Dm;`OkOtE`LnJM4GK&p2 z>;_^RuDp0&rt7Ya&KDV^&U=5NU5_O)1f%4y>|aVL5{P<|fGUvx#kPN;K=lo9dS408 z>F7wgDVFYOyb`dmnO41$%`h9t9 z8uM4Ipv;xAoVN&o{qer}cJFX6+A^$PzDkX z1ms!q;~NE8l(X8ThaXrQIxILMqpBPSAUvj;5L;=9W77?#6GI` z^40uRop+jx&dm4ICKO{WUJ4>8T2OJ0r&z)%KLMH&jqY~^(Rl8q4Zp0!Dvo6uoazE4 z#=p`|^MX2WJ~vhPjvJQFSo;Ijd!&x}Th4k6U(rLZ+BBaBtxc9s&eyU+8_e}L61jh|F2Lg^uU=?y)H}?u zl7nzlT6V~>^e45?m!yaL=0Y%*j#bp>#*!?8vi``<9no25#=*wchV`*G>|d1EB3Bmp z{43@-?@mTARs~~L;-j(OntP*RDB+GJ2sN4Ch*0i$x)S)ifNm}7aSCq_4l_ajxFQA! zJH}~kIG!F$QT$E^DPyMZFVKJ|n9o>}K}?nxi~j!DgGe7~tEU;c_PLD|fs`R5cW^0! zgQFSt%LDEZXYC?;_Iu>fZ08CGAN{e_YQOYP69)`#ycKjgwQ}@ORE6{@*tf($wG=)l zc`tu8cXbQG!juP#z8ITj%G~LHyN>&YG2ixqI;2yP^6zZDisQu^s5^m=-vQZ<3y2tk zP`iY%i6s-+ERRB6_2LI=$HU*X3_@xKHpi20fn;C&9c4kp1dGZOK5Zc0{ku+6PGjE) zux5P^(INM(#ZKK2dB{|>M!-|-m$nMev7tMnaH|943xL3gUR%-skR(*^hs_s;exF^M z-Opv_%;ek#$wxxOCQUf-^-n&aQii{uzv2@lb>0bj%hjcG!`J8aPqr>qP?*l$&mS(` zJC59VmHjAv<3Tv`u}AGDlndm-kf1=51-4L>+F~~2E6qsLrS@2_<5Md}t7G9{;3yl- zpKQysd|tpf2E5+JcfN!r1$$(HUof0V+gndD zet4w~bJl(qqD9mmYr~#|xw0GtT*zyG^}+-9myWW-?BTvQRmjiOHvP(Fp)=^^lNGCl zV-P#vKkchcO_QtqlN+aq>>s$A&F=nhyV)t`>M{Z$`0o1|pwmq#C`3Q4`BhTYDF|^l zmlZ1;uUu~`GKLx0`T|6*SyE<%TWBwYGZ*&yzhAV6L^E&#&y^&9DgEfbXaC~=T~OwDL?Ol;99L%8}(4WAj12O zv5#0bc4pp&!%Icxvge=VI(nFOHl9q;>Ev12bI|65z7Xn?R|#uu=+2-$+RAOM5vLAn zyL84H$pFMa!8x3pdQ7GAZ?Ps1N>wGclILy5V2CsGyOy&lv|12g55&;{c=dv z3e(&APa8lfb);chkdPr1?mwWs1jQ|lCNT7Mw{{8Nuw_5l$`%my@R4p(#qf3c(}T?T)!?W2Fo%!4`7XlkF2~6(AZV?+<1G*RwvO1P z#`+L01}MezjT~pu8YT`W`zy4{G`u2V<6=Q4%4L){_BLL zsM{`G`N?ufW0KG9*MYf@q*jK(J_HqmdiiAuO6MxQe$gl0v@386;?RMF@vTuaKFg)) z-e#IU;uy!hlZeJYiD`9b`6(Ml-ohv6H|t>hvds$Aa;s=?AjE=5HoLDWIC{zzh9RW^ zpn^#aM~zRrreFK(zHSYn3WXQD_4qaFq~7Y@dr@~HprJ}*#@mE-(BEdAHqx6(-J z=V;)4yqVW;enqL~r(nwjw{_ZxP!fVQOS~LxeEdNEKI|IJ$3SO6)c0bsay=+gk?M7* zCnraEVVIonhY9kO1#Xd@r|Y|V2$*$a&!jE2puBP#K$Umpz%=+>O;onCtowPcsZX$| z4RyxdzpH);i=JZTnODlS5#0OlKeECS@|zUd26s~v5*v#x{Pv(hgmE9We+CYrr$B>) z8GLn}r3Ol%jnbzi!S~>CDxrp%N(!im@hfv3CN%h7hVtBabv*X$P7N34cNyMcq5#j=Jk&GRt!rS{j2QC=t6^t#H{}N z$Ay}DmVb!vbV}A!+c$h;B8rhq30Y~ViMDwjv=3;0k7pfN3)wuaiuFx;qZ?Ev%u&`n z!v~>9yvHFotc})DU8`B9=wkdzc7HPc6Tc725b(bhqP^s2Ltc6dH>r}S^| z5u0Y$jm0qbA+~zO%S{ajR3FllD~FM$XE%+cji|ls%4RCb_KN*hG?1?ZEJJVMi$2Oh zi@@4)sHdi3?H4JKhzLoN`Im?wc0*Dc#2l42Wo5crq5K1p=D@up{XHoQT ztPaqvPW%WqYkRH1ke}Sm|Jy~_z0wG4S|Rnc78z-vB0KM836(4jm4pd zf{N{R{aSQwg{-xPi*hvcXWG{gif?nq3$e=iBwf%(?BB4%Ri4^_!jmpC%&VBU2yQDR zVv|-`SHpDTPjY;3-U2aQHsvBPXC}aVaSieI0zfV>=5_{d>+d8XcaJA$o3(1n#eSPu8huYmBy#DL1F``HzDUaNv%l;G}3C zr^ckF;8ZU3QxY5Y1F02T<0!Vlir-^B>4S0UzJ6BSEfaXbGs=~_$~pz+Zk&6mYru-Q z_9m*%F94}a_x*l5Ei?-UGy50XRgbQt(q#cZAfW~VG@HpyEFoKjSTH*C(fTbk$S#; zzyE({NB|6d1HO@_QBVlM`zN|mR3K7+!XZvZL+VpKwH>3Ze(}R@)|`aQ(O~WaF?)uW z(TEY}&hPbi{W=8S`}$$W=8|w>I^QxZ%PYlh^v}&q+w;?E@%M07oprhiVoBI9?0h2A z<=gSjm|WJni`4ok=LzNcekmyYTGXm6z20NhoVOYFX7LZcW!lnBbcxH2!E))-0!XiG zMuG&QWp27K#fd+OUn=xMdt^ICxuG+v=;*#Nyk=WRri_dQ&#b+)h4pzR+70o>f_?t) z*2j0C6aA`23S?arN`1+DyHod}y(2Ff(z7@3%%ShKiC{Veh;KiJ&FmL8^s z8XdQ_Y#eP!E`&5;D~E&YP_;Mu@+-H{5Sssg=l^1RB!J63npO#*#|G@X4o5Pb*AAjAPtV3wFmFvTiJd6 z2NUzQ|LnT;Y*~Ql{Jkb56y$eY`)leZfcTid%HWm1<6x5{e&_-yBOy@j5S(Hyh+^PD z8$5tX}~k1NED%d}}WJtD_ZfC)ONTz+wp@i{0qNVIZ~7 zC>;>^0Tyg)n|f?Rlmy+ENS$88){>6}^N~p@r7&`)VJ@5RZktUyuDYQo^K^WKl>vi5uvCO~I;bO@Is z(U(-H^Q;Jq1wtc>Fxo#hrghS3B+WdYzXR!XzCihFvA~qHsw33KM{pzSn|H^>NRrRZU!7pqqZ zlq%NsQE+RRq`9`ZICy;zLxQrzr$7x^!H6m=aTFb< z!?3kbHC#z?TA+;PEu_3TE@m&GC~MP2G++~7#?=BflXpsjSYWiB07wG;haV0kK_1Ik zuayAgs|mz}Q0h7+?)md6o=8|Kj#TGHXx+sSg#QQop0Rm5>)ylsF>i8savkdyxDtaQ>YCF7yP;RTR5GL zymnZ~On?QpuQ_Q+2f_FxVV0TGk~4TkQ|qK?=UmYo0ZCP1N!Pa1H@-hXE!DMnz2|X; z_*^it#v!`sx!#*af>Y^gGSHUL(4%!mxgNnN!6YU|^o=xMVxu&7{c^dX`Z0zzd9sPz z(bx2Dr^@%Y`V$*d!q`@ip<%=NZBM0%jzy)zGed6XD;wlR;cLay#@WWmD4tKqfbcVX z=CW952e+bNkAzJumr?Q(<=bay^L)U%#FDQNrFAM-+o3>%D|7my3!m9Sj zsApuv6?KOjt3_VlcjD`haA?c3?)%c5wjqvp>U)r?1(hyB_9hWxrm6|MGTxp7Nw0_8bjiX zZ9#CyF`wOYHvS7`B!1^Ts+9X>n|};~2rK5z)*XvTzb|itaX3Ex{VoL`I-V6WTT*$L z)B_VOd@_Q`Eann|?FVaEhij}4n+QdW*$F(WI{oM233OSnCgBNMq91F#R{D-xOhtL> zgYa328S)+ew-#Wb#(ubFU@obajq8=!Xl4tGPUd~?Ai*HJx<#$D$HfCwkVPg&d~rpo z+tbdoC?-($g*23FL##g>8a@*-XX4-Vc6Uusl6DU-$)7D2`e}a4El-)~6^#3v6=H8; zh5M)3?$meYsb(KZpAjx;pf>$#eM?auJ5l*d5k^EQ4pH=2CtF!Mtv19acP)f^x23LLN7N!9S}sXH`ODM=-I6 z3;$;rT%Rh+)%BD+uJPlirr^1$HAx4p_g~oKqY)cA3(Qjx&?Y|y-J-&VnOcRsG?gQ3 zy>e?(wqiG;qQ}J|bu>LD~V8z1l;L-hju{{E$OTVrC12 z&+kSX{`FNg`Vzc3r+5r*^2!hUUv>Fg;)_NBZrFLu7@APuR2eP1+-0N;{+OiD%g{B% z(lJik6efXLe7Z%$Qd9~E?JbE-!QedtO5d!*{XwrK(OHATKR7QLyh|6(f~b>Bshwez z846X;V;uKW@QU1iyE8I|yA(c)4b$V*Y6`-5wfZqk>m;g@Q`2v0|3z6oc>lfpdfG(T zKKn=Y)k{*N`E#1xEKHSvsQ*M3*_No<=Q z9@6XXF17%TPc^(A$0VXH?>LN(grLm>SMQF;5l>Ml>`RVW)!lEY3;S2G7wh-#*llq( z$?PM(!k7;tLvE4!;>xraVm;rIUPy6NxR`h#9!h8pN?|^ZZXn`-y1Z8VgtFMuFKLD= zEd7o)$HT?AwSPSDHWiU2r7ov($Lg!*mSk;jD)!9;9VMbzFF!&J9u<- z2D4q`Qtu2s^IyaIcrdd9AB{z=!qba;<-pgZ`9&T!r>?$A*|v6EL<$Af-0AO z%?+<1Qb{Rxrx)AFv)dJ}@`^0np7mE3v}5^l46ySb^a9g+QK?e7iL@bDO0u+R$LS18 zW>WlC!Z&HrGmXsr*#VS&-F1GJ9|w}qv&xO)Es4CeRqrcv3^+2q`k8iDZG^c(addEz zhCgeyg_#xKO#UotuGL*6U( zqDu72$qNTKfPO+;W9t~>xT_7!rGeAK!lG4AiyfSzL`375fY|GBi5jw&rYkwo<)^Bn zu|3z-tGl;YI$v(=AId%q;7j+dEq}SWOp>;o8WAa%S=%<0#ZB_ikXNc3>sM|Ec(C=i z8`oh9kXlASYL@MD#~td!&>VD71yt=Yuv{C?F}&UPn6Fm%OGG$uGxC3hKN z&nvN83aM+r+<0|kVvKFzq`U)PWTAf2#{&98)s_+M4_bdTqYO{cTsoJ=YdE9D^F

+`65lNnCXU=^HRCdU&zjH5FvY7Z}V z^jyA(Dbq`&K-)s$8|E-GJ=lS`6#6bZYX6QCY+;z!C@!8=>XR%n5^X+IDSoc zJaJA1HL8%+$F=y=tzLnojcdN8Sj4oG>Z1>BI57BN0kA!^5wb@T9Q;bKm6w_QUuR$T z>qa*qk2(4G^P}^+N`7xapvbKvU$->u`X;C~e$C2TQ?dFrYlhSG?8N?xfBm+?+J0$# zCocBw#S_Ki%Hz_?qv7&OXpPl`2YJ95VL%|525acqAUWfaArU}v=0yfZ=#1GJSe!3MJr$Q6 z5q0@JMimm;Y!%fNFnf48>ELsT<~;alLsr9AWm~7Oh5P#kWhoo#no$u&vZcy8=8|Xv zgWYnv@n6Q4p-^;XYDflDyJ?%igU_hO2hV7e7ADQsK-$D0ej` zb^gHX-7ZzOym-z}VfGDbFnOksLMq(K`Mx;cB*(`~wkPs#4ykn6FnXsYEAUH$=i|)w z{zJ>}VR^Zt*@S?noq&GOhMN4;R$ouh@p$BP2Hs4_FD*ss>FE*l*vA876P{qxlG2o( zAWyFFJ1xgpXZzLbnrhfbn5Ly5y;i-P@^Cd627%pi+w{}#N0I&JNREIAOR^Jt&-{Af z7E+M`?$gFyHtdw_H!+sqa4%RJ3N@G*`{a3em0ow1N2i5vAuUq~h6mo9=EWlmK6u$L z^~~zdeYQ-z4K^k6^1^(;aGFSat0~NqL!@AJK|`u~QljjMWVDMyO$Bl9AzO?Twrw5u zz3@Z6N@nXLPo5ycC2ZZ~A5e53T?!BiG7m|SWxyheT(4ax$5FNBC^|T8qAU}O#t%%3 z84&E0n3905UCowKlyiS%Hsq18nH>WlQNUcE>uyzASa=JPU__H6aQaTzDZ*^jM%Mzj9U41sk7;DSQtpd+d>||YctqDV&weR@ zp3CL@{B6>XSLfP5^ji#?RmNC3ZeI8l_1_QR=`u0!9y$~h>+m{c+QRt6gz4>Uf4>}f zUvvYsxI;n7s&%@Mffs#}l4osDocD4_>HDQ={?heId@O_->!U)Tm6kGyrp_J6O=DI{ z98fk+L~Ff&M}#^Yxm?p1e(5mh@w6p~@Ku~4g-`vSgOXJua~7`iqq%MxjFKK7aCkq+ zewF6jdb#325YSkzV@mWN@$YUkt8>aKYe~(d(bLr~)M!1OEMBTxr7~a3VBUJi}TJrcviEiYnm^QaTr`%vW^FOcKJ%Hm}<0W zKhpOb{b?hz5vbdHmvwf#o^H8N)&JIyyZE;bdKioP5@JTVRr_6NM+im?v% z#tc^Q@aE%a*2+=Nx^Y&DGQ%6&*D{-og}qe$I2%M!@g{=&VS~NAU<6MBdk?3g09&TI zSDpfyBvZ)OD>ij>^qsJ$kmsfwX8MA6!voG};fQaK`<~k_;d$>#+^~-J0a*E3fx_!U zsq5;es)D7!0kha&dUA8ak2;rUIJsc~*w<|I_G zYa)N4{eQeV@^pD4x>83uRWU>(W93u~^=c3myxR-y{y6VAnI-0vblKYGb6psam66d4 zo+NRlAT8ik<%8XzM!v(z%%2eks3a_;iljSHG%$rA)V7rF_kibCNPe2-M!bmQ-yUiU zPmv8l1jiz=3^6-osaLk@rS#YdG$}YmTnR)yuguWCtn-2%x;%QR;hI9y0$=J7D#lQ3 zLsd#!A%j?q)#r!5KE6U{BYEUw3%+bOBe5ObZOWebE0+UM6kW02Dr{+fu}Fdseev&z z0aWQ)5RJVW4SVZiGHEpIzEVumGn^b}LdCBc@{_UY>8o!V`yD{&7b* zTQ0Mx9!YJEk{L?%@?Cnh>ro;oMHJ6$KqAVXL1&*oe83(Ks5}vfA zW~_;NDrkv&$(88Z)-bklagwKNgHmyNG21+merczEo!!Q%1Y5)YqOg~XE%diz7jQbD z5)PRUxleVtJ`UaTosyZELYkaws3U^f7iU0{$&SUEiYL1Vg{oTo82p=VUv@>+a3Bo0 z7CR%1uRBNqn83cQ^~XyaWw@%?V`jEpHNbic;lcJoQgI;9j8SmW2S-B5m{rOmaq80_ znTP6oqW<1ryiPkYEX88u+F-lpoXtlFME|%3^t&wmaVbCXfZKA$&Ku%_L)uzfp|+*L zGaS4dH^oJqWb*s($%o>?G{@~~g1-3A$ne4Dsb5lj9Fad-*1okkCxeau zvW#%_W{9m=icb@YryCRY6b%z`=VK>|voj1?xhl>vqr!Wl!ob(>yk*&urI7qFJT?GD zqux8ui(i1SlOIPP$38z=NwGVhvEPszHGc3^+xQ8O@#bYgEvPn8KU7WnHt{z8*>3nk z@V6(*q`LF6ZmHX$2#8{$+*E!2zdT91s(Qy*Qm1nbV4n(w5$DruJ^OFOuJ(6d>p{Z-nB^xK#W~=LH7nFj z#qu(z??I7bYMhf!xw80;1*obz3Uq5EAvO5RCEgVcLUzb#X>1b`4~t+@FmN~wUJlv( zE$R;#lz7sC#dEbxKUtxRC^;O&BXD(=WjbW%PZ-RNLAvHlW}u?xUV@ncLg)UHfy9<| z%q<*;(XKZF-|;?;jGP`DS`YA8?`i93+1YHau(ZqtfBWEc%%8Jn04I>o4#Hw^y64uW zFs{airQX}y9TS03jCLoX`l1ar`_e_#)d&SlZ^Hb|3>#h)t+wfRHciJJdbW0UlQ65j zGKGHG>RAyrVI>i*+kG8#80|IGmJy_OUzeegyIvI@STB6X*AwCPiVIzkbPC)GGbQ19 z*E_SA2d3qjgxbB49EA?tVndDAI1K+boRTC(HS8{)-rK8#yRx1?F_u(L{o(BrM^*l1 zdmbB6W-G2g_0|=OwY6Sd8A?>{yg&8+W+p`Qf*_ug!hj*7XQ4l_DI^1d%05Hgxq6OD zijojEQ!^(S`r;!Yzl4T*=1ZnjPPAaL1Ip0iXrh?-U!>`Yg)Xfp%Azw(mEU-vk+`rE zUKAT!9tn^9VGBJ~FA{KtFRIL{Bl7`*j3eV@+;US`g+HSZ15rk{l$u&~4e9aXz(I4I z3=MQPhroNAD6!YCNP;HnSe{aQl|&vjHQJtVr?HyfoNisG^0n~SwW#T2wgq#n=2leI zbuZ$rZ<3LxT7M7}goZiz7`(A&LQdD_%qS0<3aRN@4`H^ytm7^32VicjZ|239xM-Pr z-n0L>OLxQDIyWUEXeVx1(RV2vorcZ8VsJ2y$TAlaU2ezyI*Q`UdH%Zp#@P(7qs0qu z%;Q)Z|A_S+Y5$imz|VAZk95pyh3>Rmy;waZq~=zr0I?2{|G4j@CEnCllJGk425WgY z08!<8KgqTymW7wxcMjV7WF%GX~7S-Y{PZy(%-=x>gMz)EUkA^@Glic zvHNZfh1-_DPFb9KZ5y8^3|frBca}1%yj~$2iqusPWVSs;)YjSbt;Zl6=0<`$>=1Fn z!L4DJ{w>Q`2-6J>LxjY5^f%PtqR-Qmw%~RYJI=gnBTL__ZH_I6Tr~PF?RN#f21XUn zj7gFyRx#tUkYbKhAPPK?(fr#*NUtF96yvl~VD+XMKxEFa=!uzbzwlQs;%1x5}(6?6D2@3fXQ|4b6 zCN#Ce^Chyw+B=(wv}wGW?vad|Z=vQ{-bUk5TP6km z;50gnt+C0JMIR>CJx~XZ#s$8s7$$CHDJTLOmVLMo>oyDvX6_U99*zZgG4ive?zr5K znQ`=>&o0TV`DY;%PC4(+HUnQmx9S*h@a^B0{A+xsXxAM0l zLePRvc93{shaCnjxL^=~tf>)b%lztBBRp|fXy=*pC`xb!EXF7_fA|dB5FZkCiUI;XkOY3U;L!*;#&jj?eXnp*nF!-LnUqGt2M?6SWn5W(Q- z0l4v4PQw?A#tH(hG-X1^y1qEik4MirEmqv7{YUV9KdW`7&K?qh#pTM;&16VI(=ilc7d+-6UvO z1UR_DcN{A%W{jWQu@m03kiL~fk`ye=^%M+Y$)yg7x(zts8NFxw>2pOm6=iRKvir(+Q7KZ0J$9!Kw^~SQ2rfy+pG2 zHzFEGhsD_zQ3^@LGeg{Y)dK%6_~Vo*4(>)IlcKhiUV>0!7m<;EtNm+z!> zW7(E=6&h?l)HMPk1J&x^uAS4uCDJf8ip&MQB{7_Yb13xGM*DXnhlQP$DrQa36EK3b!_k4cNyoey{lMgmt&+p=Z zh62jAySC#?ohf>EEr+0hQJJh7?`{|PiOowwFBWPn=U{W<6naTkP~H`2Te{cmzs(&z zQ%Q$6)!XSsrqkV%qCH^hRoLZ?^Ai8l7|#e50@{*pQs*aB!7$|(oY1DxUSQXP_!E3?3Pv7o# z59L!aNCYX2y0q-fkf|nqW&1%`h(#y`W$-f*uNV9PU=jo!EaxOPjW}OjT>}evW1|;? zU=r+@vk9>y{o|+n=vGUtsKJQHty&R2!lggdq&T$F%*5f+xtz*DXE`D5C2BE)`JX~x zb1oC5bSja!f8H1-MBp+dV#dqo%>M9R&KYm}n_LlGpO6%F?MvSucqC2m>)$Tj6ka>C z!u?A&BsMupA}~pQYMTz6?GoBuam0wqHB-oFKel=eDt`_;?}2n+{Flq9D;Sd9su+s> z(7|x=KZ&N?^)YUjGj_NP|K7ykd#ZcehJt@Cc}LMQdQ`d;i8myao?}vQ2A*P7y89-D z+iy4T_p<-xZsed0wMeU-9v!{eE4YMN1XioB!q2Wcn?4LEo2`4TynC=6mgM!?kHTBp zAGdD4WiJZ$0OKiRZ<2xP3PfoazRz6$kF2+V$}(J{Mn5{Fq*DaxPHCi-?hfhhZV>5I zK^mmHySq`krMpu=`aXEhx%d9-UyJ2ZWPQBv6El1E?3t+(tUv>+ZCL9{QKf^0+LT}onaRgPRgVi*%DPe z$wqVbqki%FHSj>MY5jTJg_+X_@YaIzEfvS%+6<;_t3DRP~ zVvA*;P`;{eEn}F+yPV}8@TS{>V=8^tFLMPJ*JE=23Z8Cf6E9{vtc z{n}C|Izzv0(R?^HudAY>ew3rtlI({9Byaw!-jJ;kM@L(O_1K*D~Qa|c?F#$`Lkv>KRLJBe~_!lboIQ4<2G9q zkzj*mQJAF#4YumQp)199iYIwYaU8mQ^|P=ueviLxUK^2Eh~UK%%g((F$}am#R8oI3 zix56ca-ryt8@3_i{F+3-xk~+{6i@oTy0!$uZt!)=2mbU1e*u9fhW%f#3MLyhfV}=j zE1VqLH^gFHRHn}x)X>t5(LW8_<+x4Z$E2);VRpOHZSHv1^+R<;>hO3j%;oXk#boP$ zNw|v?kK!N${Hcaw*7Q_{LQDdb?$uA*UJ83CtEG?EY=-DVWY0VZ|L+^@qB|~Lc_M^o|9;RuB7{YKw;vsxZ;ftOX%|1MZ09uR5vI4#X>FSdB0}>O{!C^l!lfLre&|t{a?Af!K{|$DX2U7n?PxgBO-K4gGza6oXJ`|w zVgd`3oEC`V6L8@k(B9Dz`@*I^dSqoPbn$f?f5kTZ^a~qKS~n1R_~r1QbWLaF{(Pg4 zYt?7vxHw^eqY^-^-)P%zzSu_mZ=ijHsl&a1pT4DdRMb_2&EQHyznzy{3eL&18!GGB@t{QSw&=?F|?hz(4cI@+~>E zZFkzk$bd2++b922WEP5BNxsT8J1o-vtSw<WF=FeHfN3feW)|%ne+}Oyg>69Dv}?58ggE)pNv!Le(@RVsurLn} zZmfp`H$2;@Zhk;U-LlyT6HAzufApr|OXgoh;Auq7k=%S^>!F%|yJarN-i6N-4|meL z1HA^vG);^@S;&CqU}Dn?+q&d;w|LZvZ1u5jO%Rx zM!&DxeDN%|JiC0U@`?79&be2wNW=F;vUgB?^B)KH}H!)Xr+6gD*e>RCdsH_6@|J zCVcncKvZ3Q*5wE2hkappUcc;VrMIq)9B(kbZ6}=44kusY2Mh9JI|^J(!JhVKzS}lShQ%^? z8Ls9@9%%V9)HhE_X;1P-Xk6uwOcdZ1BM}Kg_98zSd~v-gMeOP@QOWL{4wO~3F+Y9l zZ*-!3MFKHT_n4*pxB{oE|E-c#DkHqf*)^9=hIk7 zhfMg{;Ysf6Tl_qzZjRRnvrVh4C@=bDV!vuSjLFgV1Y77!Av5i(DLBLJycg`NSANHI zO9)NB+8OWV;Pvo}r#*(pgZf@%=*TRgU9(Ynuf*SwMj>i~{-qKSG?dp9cU87c#+Q8u z`D_ufu=TGld{RpGG*lq{V7f}|oA@~I=xui3x;X8w{V6*u4jP(zRewj>{bpi3QZ{FJ z-1k{bK)vAheUURABcOs<;vqGd3aI3|8CqRtbP&nhtDkiLh?ki@Q$gt`3+V0L>VX48 zcz2`rw3LIOC34Jj-(cA%-^cqK6D0qg(H}pq??5N>go81)$OyOftJ@R@K)t-)!_3Qh z?b)r)fi#Cq@Ru+XPletql)RcxG26k4&k$uX~)>> zC7+cjGz@tL=F&V`=-*(Py&-Ah=cqwzLkv^FxoR|<*HrC!DmsCGxTG~1in<>2rUe2 zi8!XkGw7-6_`~pOISTnb#5bS0TUKN3^tak;@}9t|ii1Bg$Y(mjV~T;;hQ9#!28ZV| zbsfmOk=A+f!F*8sqmigM!Yl1-1-TT43X~MryYU5)AZk_g3iyq|FiN7%Ih>f)&KD`@ zt5L;f<0Qw!_)n($2x8ORK#PI8gDHMbI}4a%KoQ9%%xDg59Pv2-R002AAtT_yAFr8>F}!y97xCgSPgm`i>ot)A!6@($`7bgv&h`0@{{-9 zi>tup;zFVAZD#Df0&>$dJ~u;aYZS=X%uMieyJ58c_1GT@G=cA5dB6@D?~C|6&zyN~ zMX$G>k{n*Xe^ZUOltV>UB*uzC)F)((0duxSiN`$KP3^ujjm~ftjgV6Ie&DmwJkKR! zc`hll4lGe#*x46YS>}>ZT*QXXN(PikF^0Mp!sw>Ld~uM0>TXpr2eJJJxoj@(KnD+O zHZD~A`KOi2WCW-@sG~<*>mm#KCX@EEE>wdkWPev`g*EtoHWrxx4GxzPOpz~BJp`;Q z(FKI>y|;JfnJuC>N;OW|nM9sa|EweTFKPu|xxE~bC&PVhAKenxTg7A~h5Sh)wD~`Z zI3fAX^Q_V9Q`vLoGiq%ssfTbV1*!yZwkwRy5to#sDx4;t<)uts%~q^yiCi(ha8wtGv%Rr(N4H zR#iz8Bt-Sw4+7758ltBwyQg5k`+h%}r^odO=5;&p4MtL{WU5Ed-r74I=Ja_&(DT5< zmYzB-D0|%4MDWA_5G7}RdR7903x)_&NruuFqCQPh-KUy{3p;zGb*O&tOf~JUh5-u< zUlEHel~0Spi{dS0e|Xl3tRL~&Be}VtNR^c{czG2=;@g{0_Sn~pbm0gGXzo#-9A3%e zs`Y5o$Ki``%~CpYucif4(%9oT=9%PFL{q2{`?f;^6)B?|;{EW}98b^FBX2)DO-3H= zk{jdq0XuD3wjn7C>U#_fwov|w^Io2y`?VNDUHn&@$79je$->YQKqT0y{w-)VLRJt? z2!bd@M#0ItQ?C7?E$#ND8NzZs1I`9ki+h>VN(;8&5T}Z0x0O#m)l`F*`qH|LPT}%fGeRDQRxjq z_K*7}fh~SGUyYERoxNxDJm30pzx)wdZl_koD){d`qZ{WZs75Tv4x805 zt_y4H7^@5npj0DYbBBK2bf0;l437f#m>pGIYy8lr=6kmomkjI%8;a2*We`-yNa&|o zu}&3rqqiaCYf8`=k}?B*G6)~5|L~cVVoTzUlafgIIpdQV5mWtm5c`5poNCFqpC-Fo zq8Sn&uJt<%zj{i&sncm{-bnqtmOE)FQIiIF`1t)#*W^DGJEg+5Tj5&WXX0E~J|KuFVrns6JO zP;4T;yj)qqSu#l@AxTddy`2{FZ#1=zHdJ|iBa`=QFx9N#Wvd@nXd@~5*!tY#`#(i_ zRkUTb0wcqu0%_?3fJ#o6C)Od=jGKSu5=H9)TnWpg^UF;)ceB&aksA!%>m7ZqR=2kw zK*&GnwAKZ8zs=SDiQ%KRwW_M>8fc9#0fbT#@83HdFY<)0*OIIZ4N0|JEm)ab4E{L( zfesFOH^&z+&dq!l z36e~xGJz4;n9dO&s~OZ9!U72Q4`%AN&c@7lCd6)zav*wrZMnqxy!vMwf5cnHpXB-+ z;8D^SPPyyUKDL_)@jSD_n*Rm_*0(>{G}WyrO-)a)JrBjK1mja%`r_olVv8VB=ztpUN?BitHH#9}&c zATi3KGGN0DFr=ew_t#(wE;JHJ7xFX4&z_*6psG2yC1J;aW>JE>Hx~k&gZ94mno~mj zmp*@C-Q0+#C+p}mJnDER5tFtQg}31!z258JsE~+6U~eFsL3}&a$4p79X?q}pyZgK# z)tvNr&BDm6w^Q1w9I|xOJhodp$_t;qwdUl$hSf54lwgER27xYY`eNX5MkSCdA|ku4 zv2qaMO3_}t_7>n_Ri$U=EnN~d29@;&`5z>`zdJbu z>fF$UH5*9_flN&8Od`DsXKMr0e?8Cd#7VoW`CpkY2Lp-aFS!~cr6>{a06eDU+HiAP zX+;_+3-%-HwIJlv^FzVnFIpJN5|#f`I-F&Xw_c( zWmXj_P)Ns-3>ss8^ge?Fmf>YY@9M=(1(?yf=y_0jdV@SmP&x*^%0R^m(jb^xf6eK)oetoCuTh&lyylq6QSCVPbPpFNPtha{vx zJ|GIKMChv!_X=6#lleeX#rm9mRCf53V5KboH;n*s)q)%HL*-9XcAGX+;{64? znA>KP=a&!hk{7BjIPvtm5OB}9NhcRGkp}=Hb2H2k$=^v41qX@q(`ITOnKTe{Nnje$ zy6Jl(ZVXCFzqoW<#m8lA+m7P{izZ?B5Uzs<77bgu%J<1z!4_IyNnT%uxKrN{7V8|} znaYccW*qLd;%BKmr~_!mxhVyNANBmyTzdKb$fTv$s>ox#NE!G;wS<9Erjy?qk}G;$ zW~klS13Mero4I%R@eGu(&W=|R8nBOb|8VN2zGz;k19~Awde_%Y&!T2Kz0{+)UfLXB zAGam3);FuRKjd}8^T&{(n77Me z8aHC1F|G+hL&Qe4$%;ZP!9o`;)VCg;bRdj^Ynp|um;LHjI)J%WLb|nB4BE?F8vBp* zjJxZbw>i3jxa+Hj3DY@EV~8azlfSq@HG#O@=J_{HOuK^R$TXYZK)W{ViypnTio<`3 zq_;mpgBgSifqGrk^Ro8COGedOa&-`U6G&4>#yaUg^$7q!h(lJ@#S)*|v6dbiPibqZ zGwq&vK=7B?svjvyepkTN;<;k-BY!|a@_gYVh||tK{6?dbDgBm#-9_3IBV0qXWAAi? z3iRa>k5PEf^%|7&bCQtMh#cwAV#$Hqp(Q<_r;)---(#_^t!~l*Vad^_`P)x6do|dK z<1c0Bw&YoPQgXf6p$F7_!7y8AT8=>og7w)DZhV#w68Bb+T>mTAm19ElI^=D1sbMa z3<#9IQmR%>k`^sndTLK)+w9jdwx?i#HO{U=hO6)&6}(~Jd&vuPvP&ld%G|!d=^a0` z%jGt*XH2Ug8BYKRtAdRyj~B)rN&i!sF!Z?q^I2Y`p|?|~;<8?d8l#+$3-?^9aUQ4( z*P+RwET4;DWvC{;3Ans?QTBjUV2IzuW0h?w9TZ7K)Wc1Czai9#!qB)2V{pZTpc?9H zC3Q7$gn!*c6xqi{jD6Ue#eNWKfmhXe!%LSkKECskXPQyX~YPx5Y(Js>n&@un`Aqw7REW zwe!~To;x%=khxP1b!f)ywc`%tTu?DyEb| z0t@S`rQ{BzN!ai^-jq|uGfZ8)Zm6#HCnDA5XH79Kj%e;AdhYIAF7R_-fMtWc-eb$_ z*$ZehYg0slqh@XoDy|eBJ}aHpb+6j^`c2;B1UTH>>&R=hFX?0&xB)kR zc*ik^6lfwx8%BY05h$N}gK-*WW_bbC3n~a$Y$_Yax2-yzbmb?bS}8N2Uk81omJ$bd z0<;7#mJDrb*oUjKEC8~>tas@}qz!Xue;h$J9JNe>qbX&4(^fcFK{bx#!0?5CAV$w6 zeqXL)q+~XJY+uIL!;RNPFcK~BEh-UI>g>FPMdJ&rfy0{KmD9E?`U)S9px?uN&H^zc zcf7QZ8Mi%3%A?04ehRK@JSLWmVF2I|;mUpFkpii3qnRCuP3JjY(%@Dzb(!<_y2~Y3 ze&ekh6CYqDKDKKka^qTT+mw6PU2%$R`xllJ2g+KG6$D#A7zI*PDea6w288ZzyD;8- zm)TqH5#>lg(ZM)BHTUE5(zDOrg73;3x^(yK#-e1lU%;V*ltq%}`SqndoR|{hX+X6v zdP8J^KwC^z3rMBZy`qg6c)qgEY+7_&q+E9oD(0UkfrPloAXY97WQ0u{TQHM~QJJ<1 zz{^7CHFr7_?lRLA|E}Jq2}AwtVkE)NAF=r5?^$7m6!BP@V)De_jSu(=i=m*ImJm`X zekN=~I-0IEw{p%d3Sdsb&hTDydV%O(aL1g$g5o6GA|2uGLgpi(p}5fN0UH;jDYrh)^cjB(kzfe=h{+Um z&fE*W)as@}u(O)+J2Gw!==tR>sAU$34+KWvM<9BO_ryoX ztup^XD~>NAW}rCi2>EAS-HHx55?t0dN4(@7ndzz&helaCEEYzp{Nda_MZOITYl*aK zGP6R`8w`kxa(^5f&&DZ{umUQ%cJ6l!e+qj2o}$jMTJx*72D;e@j8RN~`JEB?wP&jC zVv=H&n#Xd@z6V%w9loZTL$YEW*Ahx)W?;g2Wu}yQ6nFKrnjc8$K(x$sD~;crTAf9u zIg>z#0C1Cg=?RAz02^!gxIl3lPGtEO6iIFgBX)xH^|Snpq5q_y9S#)M+zreqj{bP{D(u2O!p&J`T)sXSwJ4>w9vnM`XD7-Imtuxt@}g4 zmd6q>CB%CIpgTbw@yc=w>h;?t6EM;hBjB0w0)Z4LDza&M@jIn{p6S>z2rLF@O^Hs*r(McX9#WL!7!UUz=G1!V)n}?Z?^6+HdRbs~ES^o~d(H+0R zHErE{e3e$m(;qLD2J274cx|9PnFbeaX(I~PF->;B=IggdleeYNHy6dJ4UES)C!;Br z$m!$h`@fFsDUqxQI~~l;Xh2W#F>pYN2*j5(9)^sF{?&;+VoF$G@>9fHR90b(Xj+Au z!zJ5liW>JKiFA%|2MgFLTC)eOS0fR5^TFiDrVWj~5&cZ+kW`|TK>p;SABmJ=c=~-s z20fM?R953Eia~V}cniroh=0x1o4vc(Q1*+L^c7bJXCtNU3YqGiQp`%W2G=etViScj z70P0kMl=Bf&LkIaqx`Cy!~QwVfTCAJY~UK&?efp3xS9)!6-Sgq7_V98fHGkf2)0FFsnXYqxOy zG3axMH#h?oi4~cEK~wyW@DE}3XyHK20ILWxpNZc(RKx;JvGY>Wg;8L?7dnf6wJ^b? z7?!$3!JpzeFoW&HEh)DB^cm~56>?cH{DeiT0q0~>yH4m}hm2azq-UBZ=K>+Wxs7vH zZYNUx@T)>DE7WKc1@+p%JG*pgyJ+p0t!*T)YB_=Z33@bFY%Ch=q4l}TRMcnIwQ|m2 z^ZSp0gj2PrW)=p*Ru6YBa4oTcCLb@h*c+gPIi-KazTMZ8?>0&w-Pohq^-urwQCBotDueAM{qOcNL7@F@ z6Y9_#G|mT8hWK1it=ukKk+~kv)wE|33-=M~ zTWJ3rd*uwA&=L#l%jA>$Nm-JtuYtxN#EbLa>>Ki`y2%g*nQ!`iG8lT5Sh=56vrMSs zp@h~xBY~H^pStQn_ZJ5fRC=>OyT>Dpo~~*_@jJ=7Z)xB%x-6gX-8TnU7C&FhlFAF7 zVnVvUUP03-ypqTfw{G_Gv}V|mJVAYZXRk&L9IX6u7h>j3&@pxGM! zeWx6cfCF(FosC_2PG=GHr=`ODx8Qf}GaZI{)&uv98)071i=>&GpCtc1skjhM0m{d{ zyjtmK4Cp}&GD1T|w@D74lS8`&s99)b%_4bt5YSu&qtreskE|qAITk?lCXN%`QNI@e4IwOl95iyLVntKb?P-sH@L$I+Xq<&0ngIBNx zqbHC9?JWkZ`b*f2uuldM!zz}I?|X52-%vVEa7>#ndB+!514T19*Bwz|{G}tjXNi6I zhj$JIETNhH7T%Rx)4;KQ63Nub#vaiGDFbP_4wi^_>VsT?821*N%F22bOR0Pc*1RO zKr!(35|YkeT6m+i#q`OJQ^L^wz>YKXvi9xs;?smj**Q6Tlmyy8tT#4ax||raEWyj9 z{_kZ}Ml?0m!IApz1GK|!DYr;w2rKm^zqYVixaV%L3Hj&wE&`MgpIJ?j;rn;!pa>I? zDWEo~+Sf>sYc13uTS7J*rL(n!m#}Y=aQ|pp78ARl9$Tv1XxbCwgsZ(6 zt{^CE9xj|rK)0k{JZ%gL0OAY3xbWvIqZBv6L!`PM$}~ZfO}rO_y8d^IE0Zv(_8Z)D z@}{|Ze~6DvZq0raDHPXYdU`~Pzx^gDl^cb&5#+vZ-UH2`_a0&w?rgG>>p<@`EC3`0 zxf^cEy`JC|D7fQiWJdqzoL)l2HTzX4uGd#|lw?IIJ_IBF$`b6c9DiAZl|01Otnx#B_f(n|Q}v^z(w#%C7&Q zCrQn2XR(puzx7{qxKIfQW6#Ee5Z4ELt!_A`c`6>tq-r>BJD#)GInb-*ED-Lu%DcmP zWbeuB`P}~spt~9LS4)r6@MQVe~M%aDmLtg@O$nY6U{vtroW}rg4p<)v;P+xvyGM(lSQ$z7S zm&NvhiICSIhW>0*ucChezmK9lU@eLkz4bar*-csG$I_D;LwCmx?&>x7>(eXy*Ge>? z6~!OWA%pYS5=e`C(Q@@chZqoxK=j&g-+I1LXX|TEzQype4rdSgXG7c%ShOp7jM%4q^u(5-Ez&I4P13@?YzQ5qE3_e2Y z+D~@0^;9#7{(hL3VV_Z?IT*e|OgkaM

C|a;o!Vo!usurk7N8RYyt5Op=eKnqrIE zYFj?;xF32QENia>s$7=J`}np0DEsk8va6j<7~wUY5|10?RV4M4cg2aNtnZ$j3vGJ% zHDBQ716xw$+qbTkTgfo{a+{BRk2$RABYXmR3a6uKRzMv{&Ie)a<~2-jd&fMacqqJb z$5WSE^d;MV?5n0>?Z)3YEv}aeds3I2zZZOj5SoXt+sfbV$!HDNG0*M`Sl#+~i3A{u;3}K0!m0No**z$gxRWgW1&P1t$*+fl%c*U%+rxTUZ7!L| zIEnWXw+hphxf8SkO?$L9$7ZS<$wIY{j{7b?%k&EP&Jbw)I2aP7+8P}BztoaY!^$1! zQ?bX2W^`pEDrW6m@d&@Y{@kf+*ZQ~}pE411xGWQ}%HoqS7!QgUUI!5Sq1}d?q_=Gt z3@D6MtKv9rHs13>LpDFwYUh@vAN%4B(5UAb?5|gnDl#aP?$ou7nTZV?yT`uLssAu5 z+a!hB$tyXr;B)ZjH{<9fpQo{-b=G$3y~Fk+bhM%8l1eQ#Uyb$Mx!#@Egn*~>WNb3u zrRTHrTC(aUVO`+5c=kM6#@v>+t6FPIjj_zfPrIP*?-H8)#-?FX!{n&B^ zX}99^rKXaVi~L!+aU5!&lALz?5-}G}#p&Rp+VX61#?stNPezj@FrB@=HAg)~@8m+` zJJGaRzOUBHdT!`i8LI3RTg{@{yOsS5jR7>vh5Y-&IGEdo8 z4U^38nM(XyjXY_Jui2F*-5CE=VbGY(bA5?YYo)4k)Tle)@^+YZmQJ7a!AUlggs}ic z1L|Qb&WeL!YE{(adlXp*B--NX6H~2AyxQM{L&<}nSvSkAl;-aT^oWyjg3eMZ?DAnl zRZ@3-lWYw0d5rz`oNPqSV{>*RYk;X^@kDK`VLC0&XI6g}6r!U$hkj+x2d z)Vx~yo0H7z#T@>P{{HUt6n!s?bw|CYRf;F9>18kU^nA4JaE%ISiSlA}abt09mRj#s zgaLlC&qP1=$==jV-BIf+tNq3N{$jO}-PVfqo{D5DXec zuvK^H&V{YTCtsos>pW&N%i81X7Ii(}9Ki5C{j{7`Nth9v%rh`mU#eX`*mgq6C$~!{ z83u=k4ji83@7FsB5bPfs8Cpw)f!_)zn%ss$=(vj7Q$5qt-FNCe>wny|MLRDSR68#Z zXySN0DOI5q&J+*weAFI5qSc3+npl$JTE(c4)t;3}Ee#@iHNAWtbR1>#`!a=b)M7-E z^m>c)DrBkLqPOhbpLsiiyt12)!z|qr*c)L8Vq69|O|1w`*vy%y<<-saN6VJ;X1R_= zJt7YhoBl+y@)}8fo{*wnp8V=Gj`5Bl_$%sdG;!DPJ+~5#K0To$4$A2P97I7!rdy>| zy?E)2N_Cy#1Jv+zZS3}LV=1%wujSf@S5D)J7o1i-d!jl7ay@HI?fzS6yWCAsArc*E zySI|+dzqK*6P;owd-*e*?WLhuGrdOd*m5{q+pgt3eeX{04!MkXW2rxuA*pTz*Staz z5X^+4q)Hn!x3kLUZT7lnG7$n6b$M|yEB~oNl|;AEDxK|Z-tg$F{G!v$6g=tpEV=LG zt}42FOfFYF-#?%qDeSbpa3n^)9kLLP_GUp99sg2n9x5&Xf%N{WB%XF><#en2s~gTN z(>>MS=c7_750DHUHvFm7Nza=Rzs<=ALhC zxpbvup(yG%`X)aWrnB>n<_3q0cgf0I_ih&KngC=iVaiF#UN7FM@B2eMX1~r@at^;N zy}{cAnTs{%_b=@k#6`=x^h|!5Awp=-Us9KprQ7z#IcY&N)O0WfIx%PQh8Z9OdwM8h4jZ$4Xv$9^M?atM?#TX)VPC z%$&*{m@+Qk%e2SnzC9+FEl<%J+j)QX8UiaN#+x>t9^cRuTWCajSnbT4t^Sr{&#nAe zA&dGNwRbP}z-5rY2%cYOij_pQnN^(K!X=M)dLBz%-qhY;cMXomv$REas05F*aI@`C$fU@n$%V0dPdV&6lgL52JkTdt95X_J10kAJp)ng)(4s zCM=FDj#aYq|8}>7^%rRLj&MC3ZXMFIQ58~8=M%<~AYO@zut-GyZ4h)+fmvwVtL#7^ zkvGcf_p0e`=jJvs|K#snUZMGbBE{wa+YKKqB<}4biz+EU2dVxm5Z1z(;OFgs=0slT zY|))$W8@6npZXeE9Aw)Okc7>rnGn@pFv_wB98&*V#+i?Dt!mw(-RHNde81fe#%1NN z_W0v+s^6y;R=-bAn%xmSdjo+D*O8|y6<|!0!u9w&I~hA8ZFe|vUfcYAgtfBH0-yJ~ z=4F$9--mfuUlEE~0-bfNa%r}&g=Lh@n%h}wCBc9tHuU}V_o z=xDM0qm8#tnDdJG8+p6L%oh?rViR_REy%!AYJjI)`sHw$6MmP{(6?ms>~gM14~IN{ z7Ti{Br*i3aTZzCGcCs-xnraj}Yb{AQJo(`8O|oDk@rDNz^Sl+NgJgJ%FNHRFHX-pV zO!4z&-0JIgzONcX)o|a5;*~xcM0;>pNol=u`_ogqd(`i;F?E3LD0g_FB&}Zj%HxjJ zCAaC0|81*n5fh0U*(9kd4cP4ulwK1Hlf>%sP40eYt}fTRjq3FWwe{Cq{C$bP75G9-g&%zySuv>M8 zSC~y*CR>^UTAWWT6+JEg$+1oJj#}I}{KKDUKFPe|D?{?Y;K;%AR#9E1A?~z1fb}bu zLLlbgROU!KIoi_X5Opa zFgHIhouvdI-vz7U=gcxbvr~zrYQ(v-8FR5x)in-`>$%4BG+Q)jJu4LTu7CS^kpY6 z9K-C1(k?s$5STbG0o!5z;0OF??yS)X@mv;VRQu%C1~b8_Ss^e+jGlLXw>r|_BtqWr zD`S<%*cGkTC8 z0nW$2_4T#N@}=feb$M5*{ZnEv$vz|QY5YZ&i1gh;_-xo{3q{C{j%n+SNwAQNxhb#O zh3U!=f0#BzKC)@|$}EyRNiO|IRfHnvhn(UX`|fP19L&~N+KpDVS#n~egsxg67ta9o zFfG3(+erOKzhx_-kLUUH`JvGAfrJG)xJg1S#qYheBbe`{b0=;e7M5-wh??W1u0N(2 zd@z$IEsGf;?hUUL@2ismMn6k9ga_ardUybBkkZHbPRpxGw@#b)JR{*56+qtq;pmP(x=W-ezv5Xr;`0 zs?(iqhm|^}XyVYRub65`2eY@MmQgiGH;7-GD#8;2`GOa0P{5>7yhI`!NJxV?idG<~ zp)w5O4POEtKs%?BnXVw{ObvZ_;1EsQ0f=*Lfcwu-g_oE6Oi@=xJx{> zVkH+FWyzm?gX|sHkfB>76t)UmcoI0 zt)TzE&|>(>F3RojU`%SuyWl+KS+){h_D*wa9;bV$h$MO_LG^b2ZaC`{uz6M zOCJvfp^zdqQolAD2z9}*m+-l*74PmAmf^Qv8oIq!hlM=7SYTEiyHH7U3- zCX!dXS|at>&4Py2djb}j;{V0--lay9S1?*FRf-6_j_B`Ry(2H8A0;VoqO(J!|7?LD z9;^e7k#)ov4#eM3{h&AQ1Laig)%7DXiPF4Yj1Ut!z0ZQKy+F>ck>9}{niFR`2vCQ%y`=-s85-o zY++N8E^Q`#TaME69_dMIp{ds0Bzt*j{)tZUn+lGj^9{HOCf3%7^u;=LCUhWJLt6M* ziU<)(3T=Ana^=jLKv0!sE5`ZidiFDpL-)D$|2|dn8Fc<~pi~xpI#LBFzo`Dod_FmC zpJ(X84`8Ne>;$L7JBG}NUx9<#gzVAs3mT#jz28E36wO8Z{c0!TBQ`=ei^@hXwnLY1 zf>PBkyjU(9sn6*KPSKM z$c@bzfTj&fg)-d=6uUHpj$3oicTf<85r#>nH1*=Vk=CCG8kyi9J{59M!4K~p!y62O4p+a(5rD->%`lb+NQZAa_SM2E^L*^Yi}I6+Auc!i*p0kNHq^9tMmZzz zYyuh7k{I!4>#$LIkP%3dfVvGL)O&j9U$jI=vO6O1fIJjy)o6)0Gss4u87uD$lchrt zocV=Br6=m|RW20NYkO$S!MLj$XFATFwUuT*bj!0LiQ1a2M^o#=icsJlTL=w1$x2Eb z!6u(ugOkgjP_G0)rm9PZly0G|%VebrD}djedXc??$f34EaW8(?;J9{$bCa#Kv#cu9 z+ov|g(jHkv)29_bltI>Y1w36Z-#(nBJ@A0$jPXo7Ml~ae6R$tp*0fa1D35>1{xJ4c zRvj3-FDfU~mg@bla`)8e?1Hghs1y%H-Req;XAoiJ{;U=dU#bXjJ>%E}zN0_$KQN5! z#WM^8VRQ+l-&~%%y8OsSUS!1Hd39eQ-A{gso9#y6;61NKZZ&1`vIH_M57Os zj8Z8EiNH(JnR|h~b^v=_$nB_z-p|n1Q2Lh+I$X!>I`fp7U94Jw)cj;-T~)i*;_g#w zzgR#t(>t$JIf9Su6r8Haq9J&f_M5yvPoNM1LOObTx^>Wo+!9buUQb!%3dAWkR+cnr;V6j-cmgr*OEwGZ9c-e z^=-oII`)}+K8_TP(*&}hqc7#pzb6ceMTkUK^2q9l@Y|`KytIxylik#a4+&39Vez^1 zR^tYzFRas-=a9D3Ki7Of^$QDFE#l|P007!?G!<{$kkC|AL0`m;F8p#oxX`1nFs~F$ z5qFFcz-jIY!|u&`^|?1+M<24l7rWq=QN21k3BtYz5cY-FHam?ln-d0)@Yw_kvKi%A zr`VCp$NkqLKA&5NT&mGR(S_l{?EIk{Ew>y188gL-b4WM3$k73|LMW7@Xf4-vW$w6_ z_zciOtN3WH6jO^cs;4{iRn4*y>el0rpBw!=K=g4ibkS$!j>`3hnr@YN|LTR;zfV7> z2mIRcp4xl1YjZSg>VLEueG-|^_h?%o>~98%;n-*3;Z<+_pfmi@YTSx*TPs$mT>g)^ zs}{svedcLACiqShX%NxiuJls#IaU8>?nMY{+I?Co2TLmN8`yOy$7e6u^o?i+p$Y2i z$FeP&bunzma4*G}PJogf%`FaQK1FvuoDUeYY0ZLwgV|2m%j}q_Thge=rFVWqg#-z^ zB;L_11#I^FuV3B_GyLGPhd>ls08It#hrk(C(>E!hIV8ZM85n;+$|k~~?)f6(`e7VX zYs#nsxO-m?(!+^P_cb*;zPYbscIr0M!8WDf%6ZiZI?Qm(rgTINoFO;WfulqgQ_l;5 z^kl0i|4$3R@<->tz8AM$T9~OXb2_D12X}@ZPmzm!^lD0m>ot5v93I#MK^&Nq>JyNHW=OgT{@ZnE3j!gUp(WrFjf_| zq)`t>frF_ldIJ>%U8kkp*a;7qumtJAO)`?*1~8Bq)M6LM%SJ03l%8gkuO2 z0B9^X_%@VbScUF;b2TP-AM|anG}Gn{Sn4S?=8foA5;hq9ee}?1BWqfq@b`?S=R&YsR@gg(Z>c* z|Lt5mtkE|6*`$QI)p7)KUtF;B42Y5M`L*>r;s*n0Adn0(5G1)=Iu{pHUng|afUKj+ z!8Fa`X|5M#Hc`dB>I7IGaY$(RcR90~e9E$8W4iL$rHWedcFkH@#M_uDN7TSw1%r9I`z*0 zK^P#}4UZc=WsV&*|JCOM9MDy^D^|LzV<}S#rc5u!r2?2(z39oaK7lf9)-SuKgOH*q63*&-C#Gka#w-1d0Rcc1U~ z_#MxG&!5ln9FOC3d=7PYzwh^bUFUV4uW`PwE4X#?cTyg%*5wuFzfhW~JZ1B+_IsAJ zN(#c}ixyfjGjhlU(Xp1hObb0aoMkz#8h#yCmK6H>p9D;qXxgnC-^>oZm&Gx%=zPCS zet=w1clBPbc&s^Jh%9dg$mDv?ed{WkETBK&tlwOs02Jn!)g#6TyukHNX0PF9wxsVS zHc=(>@N~|crU&Ge<#YihJ^!vtk3R2GZ9SSD@|VLyq0DK*mTC-Fmxj{a*`>|y&UC%1 zKQJ!{DVz7Yas9SC9#yhjgzqzecqDVux8c%PR);$tuQ!dj_#ACgp{8H`-c{7Q(Yrj~G8nf%%fn@5iS zid37w`44)K&>16qDUs@jqAxFB66P#ll2KX{J~_*pUb*#YGp``;)6^Mcb-$Ngzd37> z;4c7Qo7dnU6&_Q`fK03X1}3Tr_g8t;k@zU7H*a?q9#UKpU5C;}D<%Xq$mq0mfn1Q* z)q{pdKmDcY#JK+sZ&`ZEl#~B_z*;UWWIRaa3r>zCR{TP?(8DE2lqgCP>`6^(>b3W}D!|xnoD4LOqJHC=?16NG#5bEM(T- zQ(iX|+W*N(L}ZiaDsf^o>d#+qO=GNuf;0XW7V=Voq+!{EY+vn)aMNF6HXpdf0!nf3 z?vFhJMfjDM$o9-C+@pC;zjyc_ycOkL$T?bY@qS++e4>p@W5IZhX^c&w8hgPRCWqqN zvs;R*&e8J{SD((1c;=$G3!8M;MbiB9LQ=!KIHAM-P(es;a8`2L`gD=!KUyTbKsJrH?F*VgLUxiqQ-{vO({^ zdE3t8+%)aZ8Q2+}=!>Lh?2es})DPeivG%y6QeS-D+xqc1c^} z_oCO0T0}KU^%Z4jGSM34z|g-^kW?XDPh)f3oNhXZIo*r7SlcXIx38Y5U7=qoxe{-i z^fT-U+h&m-}?%{vA^E)WG1UptE7hL;!r5iPnYMoU;G!xw=Da>{H7d{7C%E< z_lD-v;Ww)`6QtgDS}}m^i>##u^y$WxRr}tsscx4C9hP6)4l8jU6B~1Fx%_*hlb0mm zk3e#_Rg7eo^vPz7VXc3~$$s+b{fq2sKazTfhH}tB$IB|&Zai<8m8-ng2h2{fX1m0v zg1$oq>lC8wBFqAh4Z zz@(#~V;_8c%zX_F2&XT~4%mEHI349d$ZKv&imw{(Sv&WCYef10lKg>T9Xd9+IW^n6#QOw3HBq4bYZ(!Ltlj9p1OJzcg?mo@}(=cS~zy08*OyE?w+M8`Bc*ay&cEd`uw_9pepw)bmjYBN&)z=!(Ajx_x^{!$ zcq1H+W+fai<}xZ%bE)1`^z<9A_NknH$$21o>%xl#CFz6Lijg$0$GlFDck{EePg9BL zZ#j1LYDg!J)o%BwdbHgv-}d`FBT_k^ zm_|oSXX-l^97g##gbp<39~+Mp#)vLgutlHxvR!$7XW{g??lfD&fBK^DVrE>)p# zkZ&_apQc|cG5$PR5s9wd2<2hG?kz3ZkHLeszgJgB${prfV~-gR^UaR)26y7>j^@%$ zM?6g0lclAr#>U4B*;L;%U_bfiqIb3{+Oq9-L}%tInk3n>fD7SYCXEOc^fZtsElz1j zVtxDG0fh=K(Vy>shs8fEoO?g_N?C@Zz*z>c&*hw+g^fG?gv$oq<&oqlstQTJorP9T zbf4tzXgVI@dF$O6MoB~1j=Ko<4JHg|FD&G~X;v#yCFwB0B_t@w@VB~+r_T62cK&0u zG4ohYrs@}IjSsxHd$Tp9|6n?$PyL%Li>*w>&Fs@~0=DJSx0~{_9v*Gj-A-PXHvkW@ ztA8UwF4Zi+LzB7lK@tJnCb1O&z3LvC0)7LKN~Ti7O3sTsv0OhxGK2%dmJo-8Z2uDu zIKAdMs_2K1uyB5Haq(d7GP=$*M*8Gy*4;#3<3Zyu57a%^q?N^%KYuynZVKVcu95U< zEO086`hek!$MU_^(F(V&!7?sY$np<7{jf{@ODii3T@@`e%Y*G*1WXKi?;@c< z4<-qvb6}VEC8m=pCdw$rQ=Su2`diz>jB2mETW@4r2aQ*Gp?6k(>%4N<-QKnvtMtH= zD(@40pS$!(hIP4tPJkdZppn+ITGR=Y7f_{YRayuec+vRYl)=ob*L&X z)`H`5c!n?<(#P94$!7M(Biab^b`-s}+7EFqQr4J1yS8ShE#_u<4)37pmJ@imb07Q} z6l~lkmd}8}H~8c`&JCUVYXTXF2y|Bedz$P*gx4sGu*Z_F8@44+4m@bF|7&;B1#h2LE7Nsi@{CqYsuj-RQ{^%g$M zNRTNcLg{$rbA06mYU3G_ZH&qse~w_KPWL*VF`K3lzG zjt=_lFt4K*o9+H&IGETj&nt{gy=Zz*+aVDr|5#qqje4_xtKvNO+)W?s_X={ zJK`fdIpVP*JKv=Et#g=-^PjiXC8pLdV@msPq{-*q^Ak@XJRZJ9DhEV2PbTBKK3!NH z5lW*TijspQ>~7#wHLghztW=0{9C>8sX~V;!mH|Z>FaPgB{n}N~~x3a0E$i zFt}5m7bEtG=6dU5aAZjn#uZT)l%l(lK%jCC{d-!VB=5HP`8Dq0ebmj~p65S!2^<@q zr#3HR+zD=KSS7x;Ulq%F-+?D~Pdvp+H1E7TNTmGgbedh#xb?*ER5DAX$#&YOjtd$W z-Ph@(SkaP0%S_9|`|!%$uXtU8q@gvtvUS0U$64y}k=nVk9pf)gK4dXB^1l0N{f=AL z?^A48`dC|ngVi&F zSHUyM!=)fd1YTmzKaIJU7G^nn1;?B~tc260h0S>hi~E)=rG|^>le#*%uwtjGkI&?Hw9Rxa1by72DfG>7~wJk@jxK%?urM z+*I|TTe{O66EH~*&Vi4qJfTt55qIYFhVM;jQ}#6ZG8!1>F!QdV;7hdezmhRo*xP6_S2x0zrUbZk=aR0<&I{_O`X*G)Vp=TZQ6^l z)~-8eS>#C>WP|!+6K$2DDn%2P`;YH=tvtmcNpQL_HMz$pZcSisXZPdelS-!hWc?GI zc^$LZhrKQKFMsMI`t(e1M3ocdEUGEeYms>S8(Cn{;Lb3{fzaKQ{q3U1N^W-2uxnmyud{+8VB`M4p;k0 zbM)nW*O_LU^0=Euw^II^1|54~N=vQFp+FuXnhl6^_pHv)F{fIeX}q zHEwHP_P{}fDxeHfPo{ZJ(grohs@5u(b2xXnO{)zesChb^&Ag{V1OA+fJ3{5k%gz1i zJmEDF05rv|!=Vm<;lc0D!1rTG5qGP}OroQW1M8Ht`*pr6=N>~IKgvqWWIEXh_gKo!&dbhLLf7mhjQjs>H=Q4I?^PFr z_}kjchBAZV_y{$OSl*+k%BgD}fWf~7rXSo2_G>7rD1-nP2e$A#jzch?U3-Gin+#o! zOlmWKE!YXmGI>V5w{Nw4qL3b7?uWr!Byw<6+iBkZ-+5a(8D+y7U(7wU>Dam`a(Csm z*n*j(USybmWDXZ-dufzc>a>`qtn-eP_LJwuS#1?lpj0l)1#=n;()Bllp2M!X1Q|i? z@AMg^wf&R|%vjUxXquYP-)YbTJHvEpaJsE=nsN3b`>cfXQeUp`WmzPt{NYE&q6_E$1Ec(F zrwSH#Hk?$}o+N4X(^}i?cu60NK?k(2zd_wm$?i2QbBs?4I`OGFIas#0x7YUbt7@1+ z$eNJbA2pLl-v|!&TBNz#N{+axbG#)bC4CPTv#~2*I+vn^$2HyDzG&yZTUuU5QreOg z)8Pm1x}(RU(KBATsAx-IC6#d1ST%MwWz{(D{ttXs{)NQBOp$8rZ#SzTkB39v^7;E` zF;?I7^;M+Vuo|fX9bUERH;u{|{tgvewb*|PArRXgbvtK96C8-#~T_B@&g7#i5CmXGwKH*W80+>pVS~pzWiA2A3qok;|ay3jGPr_46@O2RVP* zKMJR8XuN@w+OmwCD-`PXns|tf;MzA$Ds>?zNH`oYn> zG`jb$xO()A-QPd$zT4eOm`78xnc6qtzkt)jiNT-Ol0Un3h_-r>znl>JGz1w z8qCrF35l(srpCl;+$TH9=TE#;3?DwthK7bljMV;54SSGreg~W`2OaYuDuHOH50apf z%_@JdcBd;j{Cvaq*rYrKgDWx{M(^B}AmmUoqI4*EP&ILU7m1rMIHx&U>DwUd{PiVx zDI>u+cujg=*=ej2(`f}G!$B<@pW=;&<* z+JBBSLDJ9D)2}7^@&Qs!is*2Te#0mEh6c@la_i8lldsyFju7%%8(rDyNEFAoFZP5C z5Sg2wEGD1iPHd3L*gz)Jd<-BGWBv`-#B0o>=Fj!Kf&y$Y2TBPEPCuuhIZiX$E}!x- zPb2gy+yJwS5YCTPfMZLT<0hWK%@6a&eyf<8GW|m1_`w7e$@qHEsUM8{Xdh`zdnJ5@gRs%(zo=k)c!0^ zqVU6>s|qZyS9WBmVe3Q7X#aLa778RHS|sK(-rilp)HsclRSy!FPo8I#$XWNF7yrFl zcaUG#14S<2Gm#9R1qC^Z&1R-K3RYj6GvU+Qpw*Z`2=u}@1i&X+K;i4}?+0kzb#iiJT^F@RLfwco@H3 zXXT#nFD?oJj)5Z$4}$^le)kAr%4rf9D9{yf6a;b^E@fbC0eP99wD94dgzpxM4 z%XB;8aJ@&BryrL3s+At<{0PQDzwVIr_4B);bMH*iCMY?mTnioI>$*lpAFm?Ad(abC zJL{&0dP$3O1={D@0fxGdIF;vCRf$)*9PMpZ?vDEfd?rRBirp491Zp7D>0V3aN=e&x zI@9s!{K&jwd4hAXf$1!BbL4dd8*NzVN zJA(>=73~|)2D6Grr^E5hW~8K~j~SFezNwsjqq>bmlE%<(mmJw*6SdiPu67E4hV~6a zz6ZVT-PJ_T;fDv))O8CmQYtveNYU`;>Z+Wh;_8xy6Jj|M@7n&SUP^Ai0;IWM<$*dg z(@gKu*DgFU>M((vGJBv)s!1ET^)~q*4!;U0_$jnqzqi>pz7(tt?NcaJm^`dR=Njxi zh@hgu6X2eU$VjlOeVEj+Kt+#$Fl*zJ6gL)#?bp8)(*lX$x*4qBLD;D~c??b(J#nZN z!Q(amg@6%aIL1%8eDra|EytlQhttpZzidO`=H%tIX1Ag~J->4`4Lm6O{ye=m=RNjs z-MY2(PUr4s1%Jws#No=9nyikm>$Qiz?F1s^amJ68(W{xNarVDH@cHDD$+)KDLDW+A z?W~S?G?EH$pFJ(C-cmUR%F1?x_wo)$>=K4!SEg@paCdlZX{ps#8Rmuz0i-hDR)#UE zNRo=aEbhBnhaE8Tm0y%0f%jFWhT<$@8*9yPmYMv^P1kaLmUz}pkW_5F5MUSF+& z*+>wujqZ^6tEQeTW%5Ku1~H;wS@-w#^(`Mo@YEV0NP#I6`ZIB$44_ax7{}lIb*iXG z42b4{5WK(hor92>*cLUF)|dM8F}~QPz)n6AIoOc=i!>`|pL`&A|}e{-5_djod=s z|EW2K2@M;S%Y*U2gZ-O)Y7D4qeOIK#)xBGRdQsKvKpUOf2&RY#EhM2CFA5a=yzCTT zuTBRWGOwcIK9rtf12wDW$ZjNqoAWMsLA;m;{~sFE%u$cL39$PA+t^LYbs<=NeL5Qb zKqcM5QaUv02=;JyUwEU+-TfcCq&z+ttlPmb;>sZ30M=e2fMJ)Q!x1PtfpPqHkfG6K zE)#2BwD<9Yk$(W%n;4RsIMLP9UbIhl8qV?9L97JEb)Unzg1ijF*NV6dd7=3U?`9pUgqe9Ecp4dxF_Y2&wFl@JU**t`0eFb2*Ei7>aOenn#u&(pm@T{ z1xZ*&WuMJ4$bQbm;f(}UwG&d?sbn`8V{FS~cDz8Y@^x@=8BFvV&jlBxd0jiY4Y`ij zx@le8!qf9IKz?!U9UXOnB23tzY9LoNuV2`ZdLb|CQoEl8e-(^IY;=dhiY)8Kx(V1> z+OXQYc*5qt`k8{d@ZuS=h94qEe7Q^V#2o5PMuAcJ(w~nm17ltjtI(U*0D&xI+rc1- z?b8X2!$sBFf!hZ+;XdXum|I-TS5;S5HqerH2IPU7gTuPh!<8KZYU4>Op#T8IP@^s3 z2!y4D1(83l$WVczGH7~;==2T^&(7lcCK+&fO3Wp`iZctmKXEv|TMA5tIcQDx!!4%- zspCbB`Bh&yl*whVq5kSy<>T3F(%tY0b@^3QMqpm;R$X1_A{>XTp`;9WF zB9<_qyJgHNP$*zB*qbJ9?eK*4EfD~+afU`lY|2PGfx7Tm1_(Oc^LZ!#$o$?kBpkXf zS7ZrYM49?;HX+^=vDQ6PB(2$8JW=ievV;YJsd=6M5$*umY+~L6|5S2k$m&@TietPv zirz0Y=$rY!#f&f;Ku*;XEr_N9mAt^XLL0D+rmAWLP(OstHPhPu3)N6S&!dB<{pi5V z`Q#weH?uxTkOz~}J`%qAUyY%bpG5A9ZRY^gSj-`#_8$K zngmyCUJ%}%WMdkONHAyqOO`8iFH*j4A0%YqIKMi(_JAFo6Z-zm!&-a$2etNw8Eo%v zaICQ#a98kLr}>daluM-YlAh(sN}cpq??8z!^&}(@8;>iv(GhBQ-OHo(c(yUSh7J{y&vhhh}^Ew9<2xn3FS~ zdH@aP8?&hBeS`F@q9Stj1i{--^t)5#2t`ChWX$TOjzr_);=k_fub-1NFYHHuXIC!0$90T{02D_#|a&>)@ znQ4=VMn`XLJ%&1b@#oK<-bZ_UNzy^*b z2GLB=vn#+b1C zf~MBtB^rlqCrA18#HgpbF^$3JpHE=E23g>wB>+6BNqT-ox z?-|n6u?*#lpCtXLKwzOW$C-MxPtWG@8b7_AFBuv56ciLgwEWDo~F-+vwxLw(u7+8SS4S{k2_kkkJ;=k@FL za4CvQmqKB}DXFRRtJOh}`dU?GwKe;bPT!yF#*N!Bt__WiuV1}N*%G;S?V4JG;8|A1 zNX?;Zr=RpMQBj2z74fD@`fOPvmt=_h_%Q;$;>tuF)9%`s&Tq1F=gyHZh>*X%rZMnh zmxzVM!x4B%Uy^|BUCfjj8{CQG=vRu}^$9|7P0Q46P+whV6tA+gs=l6!%kK7v^;Y%)7JP@C=q=Mrn(^z<|@d~q%y zvZ*Dhw=y#_>PJTCA#Sy3;q|E@U<16MVEpR$>|QBxy<4z*mt zBM<~${j^r0cB%E-SbBr&Pw-1 zJo3(D`jqzt?4itZy~Tp^@e7#FPoht6=ww|l*swDQ%9DKx=2Bx^%U9wgUk=M7l|Xp{Cs@AO-wKq78dgI^2$7Xc;n5RH$`sqcUQ)$ zsBLU)P#jY7NPPiBLMZ07##O0kZhj?6Du5V8boT6RPtP(tvwrNbVY%~7vE7C4bdg7s z!FEQl{`ep|zgARy>+91U3fx^YfhjqYnwpxX71GqyWKiXKvv&W_$6*OUp2TY!N#;`x zp|XDU!8kP1ffAKo>*wX<jn!Ooz6?_LYqd?{`~o)a){lSlEXi9h9h#;?-)DC=R7I`DYLPwiyY?a&Ye5;fC3*) ze?Qwjv#~#)JOdIZo+ccIxaCs;wNBIL0?K!HE$a5uGiRM(Y`UGeA%6&#^t84T3q1H$ z4|01LaD3tsD3-31^*D~hrDq8U2pV?4io`uW+|{SMcY$Itq;(N?TSyeYX1FD*vt{=6 zvq{yxy$ipGhG<~v=VLLa&Lk+Pj5*)-X5VT&dvzW~CuIMuKVRPpng`Nmwzsz@r>AkL zsi{dWi%~+FR5CSXM5O={JJctP~6kByckp$uQ+ro~z{WM=vigmi_3x|Y+JX;*5<~FW} z_PStj(#x9U&!8tK5F1793Xvk~Gnsjt@q%3rDbc}=| zk_P7a5J;Tl%}@gNp$|f@Fz)N)ITbB7Ww>=N@I)-|b96bH^3k;~A#s|6+YE91w8#ZO zI8q+WZEXpal$0!(XJltbP+^(Q1O_4)|8j$h~4b=ZmRe ziS9Ul`QpVBU5-m9Dce}AURBYvw{I^(FT}#gTHb{2X>WW&0>?AzRvOr_89Qi3E!hI0 zt`dXY+@y%2lJMW-m4X&B`)v6nQ8z{1$j0_|QpiC|qoIapNl0{;zk=o)ku}O;M2Ic} zl*?sb2Wp0Wr~``EuImaC!ExLsxF`wVZKBt&UlT9y-mrT3@CBfiK5yfD_uktjeup~E ziJ#WLxv@bW8klD)On9Dw-1H;L+{Gp4#l>p_-T^P5>4L{TKTl>PSLYfySp>?; z$|#PIWJqFGP`fzLH>Fj&T3VC}r1}*h6>dV~Y>vn0%H8`q&^0S-Z2ZY?1fo``vKd12 z8HzlINUQ%WYHVz*-;k#ZN*B~Hgh@Skz~0*0ns=G+hDKm}W9yCiEf|xV_Av~i+64gZ z9FF(*_Uea+=>UrpcgAJq6|NKMQU<}Bk+hU2xeoR#h59KwOOMUg`g#IYdXLS&bbM#N zsih@?I0)LEDWR*kZ-MTzn7a(TMPA3z%F=R)0fl>sb!cT}1(HB^w+|2kic2!6D;y-L zH{syIW^f9Z{4d*tvfEdhc>D~S;W_=PJGZ>di61P}q^qrmV{U4il#?O{G_a3R)R2Cw z*Em1@XTZQS|GmL1b$TrddJo;i6JidOH9Hieg3GN`Vl z^;))n0$4$QfcTj+XAFuhLlt4=0GjCj>S{GrvjT)@V{`Kx;Ebuo#TVQrm3?iM@8soA zukP&Zr0uHgbJPk|1f5C|pF0O>BGrxHEe+}d1;y)*w`SC65J~J~rgQUvq@_84nVlak zsbhBc?s_xsa?11s+CYhXF?8LG%m)d?ygj1HQdl+ zH@8c&syDKbx~qpCcTU~ z0+K~eK|87_Cx_^xuuC&^JU;`01k(ZR;FSt=?Rf8&bXltltM4Bi5JSfS#rDr|5K6}6 z)YKxo%0QC|4_B(eo~=Je_5syod2#s7+qXJ1@AikQZvpHs1}YuuA(d}sXP5JMtWE{6 zoyXe8GFm!17A`IldSU19P}KET6OwP-xN%2W`CIu!;5k1(KQ(xeC|&jj1hFneQAkaV zBoHGM3UX=#3{)`?GeP^lJBy-_YxdxYC4~`OTwHg1x}raBW{7@7cYQxa-3X^YIq2l9 znVH#VLrh-tMkk~PQ&UreYHz-~fJx>Xa7km2D1QRq3vD?8jz*=Pf0@rx{;7eLx{<@# z)?Uvi34kuZ9cO?#{+aJcfRFD9)MT)Do&fm)F{|JEI1XEJiTc{BO+D7$&`?em4?yBx8WtHT&xe*Oz(3W|yiZEfd_%AK2_*GtR5;CV|7 z(+pTTEl04c%lbOcf#qO-|0S!I=m3f_v@p*JCx8f+G+*|&Kjl}etE&x7lxkb^DlggR zzXhoWs>L*TT4}3MbV9P_&SR4+E0JJSHFR_UZgkhA3C$_+n4qHrtP@GYhYQBl<=~es zn;Iik#|XM0KB0; z1qD{`(MLa3`R(1Mp<-5mHsq9)SGfov16}st`E4tdv^+ce4A$au--2}TzW^0NZQ)GT z`0d(zB!2$PrDgP%myl z519)x3GpG|)>7@w{bZRFtTvN_$$*}U3h^zc;j6=c*4Cl{EvCXK3nigDmKCK?{vhe8 zsH!e|*!wzx(h4#_LvQaz)T{XT7r=U$`T4a|^Pq;FMnYxG!-`N)9KVIxxHpmVi^GX?Hxa~-BQ{ZEym?~TbPb|1@#Zwul>MOzps_(=ZhDjVArQ~|r*tSGx}Ce70( zXt9O~CdcHTN#%)vknCR27Z(@z2P?*|C*y_Td>i$i=1yfX1_K)D6JEATU=xdri^MS+ z9<@hXs-s_B@xVVV0^nNz1kd8(!`z*HjE$AmQ%J%V@Zuh*-Owq^M6{b;UCp2r!)OBn z1ZhY?!OdIFqtAiRF@Ru3&0{JLaE6ydll?6+l8?N+TA-Rk|KN#D!+$x{6kxOh)9l~B ze_c1H6}Ff9RaYI591kF)bQOfP0-ZW-9i2;x`g(~WhHqioLb25%Yz6|gwY9P+15?xJ ziNHhH-#qeLs$BRptH{X64X{UQbGT1dff94uXJ=+kK?u`Dd8>mi2i*teAc}Hwaw1Hq ztxeUTHy0ANf>TgX5T;QY1zeCE6H5>bh$k#Po%O=Si=j^4-Q9HXlt45Cg?bCP>IHfz z0{})a;Zp$7zWV%SFZuFivyn_X$73xR2agx@N)Q3`hdd!}qYh={w^YBBT#EFM~f9epl1Zmt0NV@*u3jU0=I6#Sfnq|cqCKj>b{(uGza;d?r?X#1sZB!zj>49X*MgTRI5Qw zAtdo1hrRuQ*CHe*U}cC8v^s+VH$X~74ujm?WuW=wNto8M*g`b*%;eq6#^ZQNk|WMP z54|(Qz2P`T4cTFlQBj-FU_i9`;RDSIbh^4nqS5OUV>b)lnw8(}U$@SLeD-n}kP5NR zq@HNdiA3NrR=>Lp)U;fW!pd2^`yNULvvYIIA|eJBRe6I^$;rtJ#j1DiXo}+-mpdos zNP@B)x_f4SUO|r+)F|`@nKw3&2hE|c4zU*ABM z%#-8+dIss_qIv=JYGlgFXvg{&?SdMY*#gU#GZJN3@z^G6c@G|qx(HN8Wp{3VUf+-V z2GWN9EIA?~0%|IADfn!cLgIp0_UqMFsJ&P?Sb zAS6_pSwP!W0xjB*u6M zu%W{TaslE)BdE7Q$5`s%mkP((8p-EfTwEdLJp)I<3f6Gih6a={ z5O{fk#rqg-Ib^~kf@e)C-Jhtj%Ym8+IudiNiiNXtv{b;sB&g*L#o=U3&EN#{It>ec z{ra`OzFy|~j_c0KeWaZtHP2QrHp0+B2ar8@z=fD)A(wFm2=AU@>{4rEBi_Ax_hf^0 zJ={m1Q?T|W^&Im%bf<;pBqk020usU_&@p2BgYx~xkU|C?NyB;zlsO7=augF2)864>I&??@Rzh44 zV`GNe(-XhY;78E5prSPPvrZpAfC2CvVOmiWfW;d+E(E4+c3gM%1` z1N^Li09--a%!~dhgf9~0bMNg=Tobxsck^~Fkiije+mXYNPDu? z?u!?IAB4^To{fOWl!Xg`&in6bLa??A0c6(M2f!oe>swjm1Zw&o+=7Lj9TA9_;J(~*kN)_P z9vu3Y05^3FSkZtgKymmEV*ydr{rSGY?EHNGCs}$L8X}0wf(a&YfgHyw^#n0kNcW(6 zNI^#_*~N<&-@Sj&ahU>KlxLvsTds{(G%PPcjg|-M)ss?k|H}h|xdk((h{W5>z%g>axq0sM-AC*o7#en4<&1O$ZA_1`ao$4(&T z3@HteR1AHcPvu#jz?r&NFT7~oEdsP<3Bt3=p{%@oWz_%ZIX=+)QUqR4sl(u13$y$8 zX#pGMZ9n~j!3fF9$tmFMccwf-c%WGbb>aK;>wHz)xnLNPc9Ff~@US8o*QSIlJUhg1};9D5o zzKw%GF+wuN>xqF&pbUeI$XH;A(H=rXL-?%BDa0F~x@q`~OlnU?JTZ>-^_>k+LDB(^ zsq9Mo?WjH6n+IPLl%YmY7lBDO?0~n~EqLDtrhKTvExV!5YVy=aAVrvi7|U8K{G486Fuy1pVhPUjoMnHyjie5NL#&tY>Nz_--6(F*1=Pj1mT^ zcVzTHkw(nLw6ru{za2+}0Csn4Q9k%HJ9~wLL!}_F=M>oM6L`YR?qf_$=eZ^IV;Vk7 ze2{o_GA#s9p+&jcXj9k$ctPt|qBu2RuKw{bHTFiRTZk72jl+W#o&9{kHRa{;kXy+Ml0321eDb%c6qYRUk* zsG|B~sA1f-(iLxzp6%`Jy@Y!YRw#V&DF7L|2MUUMVPRo=&{P=;T_k6q5CX+T?|8GY z^q9QaO1KvfBgWE$6pF@JPW_FAZdQqa14;liieL(WcpE1cgMxBc4@i0wY8M20DlFAU zo}N@uQh6LOF^Hn_~iq5wXUP&z|OyjCj(>g-_Padf2JhZ81N$&`Hw$9JqDfQ zywyMF?B5F+jsHsjzZWPLdCvs@``iEjE&oqPtmqW4j@i5!GDi&3aH6bmSH4izJmmiZ Dqs`G% literal 0 HcmV?d00001 diff --git a/docs/images/plots/ddpg/ddpg_ant_performance.svg b/docs/images/plots/ddpg/ddpg_ant_performance.svg new file mode 100644 index 000000000..37b5388d4 --- /dev/null +++ b/docs/images/plots/ddpg/ddpg_ant_performance.svg @@ -0,0 +1,2610 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ddpg/ddpg_halfcheetah_performance.svg b/docs/images/plots/ddpg/ddpg_halfcheetah_performance.svg new file mode 100644 index 000000000..7be07a822 --- /dev/null +++ b/docs/images/plots/ddpg/ddpg_halfcheetah_performance.svg @@ -0,0 +1,2561 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ddpg/ddpg_hopper_performance.svg b/docs/images/plots/ddpg/ddpg_hopper_performance.svg new file mode 100644 index 000000000..18ef81757 --- /dev/null +++ b/docs/images/plots/ddpg/ddpg_hopper_performance.svg @@ -0,0 +1,2697 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ddpg/ddpg_swimmer_performance.svg b/docs/images/plots/ddpg/ddpg_swimmer_performance.svg new file mode 100644 index 000000000..16374012e --- /dev/null +++ b/docs/images/plots/ddpg/ddpg_swimmer_performance.svg @@ -0,0 +1,2568 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ddpg/ddpg_walker2d_performance.svg b/docs/images/plots/ddpg/ddpg_walker2d_performance.svg new file mode 100644 index 000000000..c8c23c2ca --- /dev/null +++ b/docs/images/plots/ddpg/ddpg_walker2d_performance.svg @@ -0,0 +1,2701 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ppo/ppo_ant_performance.svg b/docs/images/plots/ppo/ppo_ant_performance.svg new file mode 100644 index 000000000..ccccd5132 --- /dev/null +++ b/docs/images/plots/ppo/ppo_ant_performance.svg @@ -0,0 +1,4802 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ppo/ppo_halfcheetah_performance.svg b/docs/images/plots/ppo/ppo_halfcheetah_performance.svg new file mode 100644 index 000000000..72caec68c --- /dev/null +++ b/docs/images/plots/ppo/ppo_halfcheetah_performance.svg @@ -0,0 +1,4742 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ppo/ppo_hopper_performance.svg b/docs/images/plots/ppo/ppo_hopper_performance.svg new file mode 100644 index 000000000..2c1e25e76 --- /dev/null +++ b/docs/images/plots/ppo/ppo_hopper_performance.svg @@ -0,0 +1,4941 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ppo/ppo_swimmer_performance.svg b/docs/images/plots/ppo/ppo_swimmer_performance.svg new file mode 100644 index 000000000..bddbee3ea --- /dev/null +++ b/docs/images/plots/ppo/ppo_swimmer_performance.svg @@ -0,0 +1,4246 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/ppo/ppo_walker2d_performance.svg b/docs/images/plots/ppo/ppo_walker2d_performance.svg new file mode 100644 index 000000000..8ba4ecda1 --- /dev/null +++ b/docs/images/plots/ppo/ppo_walker2d_performance.svg @@ -0,0 +1,4898 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/pyt/pytorch_ant_performance.svg b/docs/images/plots/pyt/pytorch_ant_performance.svg new file mode 100644 index 000000000..3a8c23e24 --- /dev/null +++ b/docs/images/plots/pyt/pytorch_ant_performance.svg @@ -0,0 +1,7104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/pyt/pytorch_halfcheetah_performance.svg b/docs/images/plots/pyt/pytorch_halfcheetah_performance.svg new file mode 100644 index 000000000..04cc9ab91 --- /dev/null +++ b/docs/images/plots/pyt/pytorch_halfcheetah_performance.svg @@ -0,0 +1,7146 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/pyt/pytorch_hopper_performance.svg b/docs/images/plots/pyt/pytorch_hopper_performance.svg new file mode 100644 index 000000000..4ea9efc7c --- /dev/null +++ b/docs/images/plots/pyt/pytorch_hopper_performance.svg @@ -0,0 +1,7574 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/pyt/pytorch_swimmer_performance.svg b/docs/images/plots/pyt/pytorch_swimmer_performance.svg new file mode 100644 index 000000000..eaf4072c7 --- /dev/null +++ b/docs/images/plots/pyt/pytorch_swimmer_performance.svg @@ -0,0 +1,7016 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/pyt/pytorch_walker2d_performance.svg b/docs/images/plots/pyt/pytorch_walker2d_performance.svg new file mode 100644 index 000000000..b4a6c0412 --- /dev/null +++ b/docs/images/plots/pyt/pytorch_walker2d_performance.svg @@ -0,0 +1,7282 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/sac/sac_ant_performance.svg b/docs/images/plots/sac/sac_ant_performance.svg new file mode 100644 index 000000000..571d9e294 --- /dev/null +++ b/docs/images/plots/sac/sac_ant_performance.svg @@ -0,0 +1,2497 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/sac/sac_halfcheetah_performance.svg b/docs/images/plots/sac/sac_halfcheetah_performance.svg new file mode 100644 index 000000000..34242bdb2 --- /dev/null +++ b/docs/images/plots/sac/sac_halfcheetah_performance.svg @@ -0,0 +1,2475 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/sac/sac_hopper_performance.svg b/docs/images/plots/sac/sac_hopper_performance.svg new file mode 100644 index 000000000..1314363a2 --- /dev/null +++ b/docs/images/plots/sac/sac_hopper_performance.svg @@ -0,0 +1,2601 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/sac/sac_swimmer_performance.svg b/docs/images/plots/sac/sac_swimmer_performance.svg new file mode 100644 index 000000000..9111a84c9 --- /dev/null +++ b/docs/images/plots/sac/sac_swimmer_performance.svg @@ -0,0 +1,2595 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/sac/sac_walker2d_performance.svg b/docs/images/plots/sac/sac_walker2d_performance.svg new file mode 100644 index 000000000..772caa902 --- /dev/null +++ b/docs/images/plots/sac/sac_walker2d_performance.svg @@ -0,0 +1,2526 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/td3/td3_ant_performance.svg b/docs/images/plots/td3/td3_ant_performance.svg new file mode 100644 index 000000000..f33ca8396 --- /dev/null +++ b/docs/images/plots/td3/td3_ant_performance.svg @@ -0,0 +1,2593 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/td3/td3_halfcheetah_performance.svg b/docs/images/plots/td3/td3_halfcheetah_performance.svg new file mode 100644 index 000000000..a7cfcaeac --- /dev/null +++ b/docs/images/plots/td3/td3_halfcheetah_performance.svg @@ -0,0 +1,2557 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/td3/td3_hopper_performance.svg b/docs/images/plots/td3/td3_hopper_performance.svg new file mode 100644 index 000000000..601b0f9bf --- /dev/null +++ b/docs/images/plots/td3/td3_hopper_performance.svg @@ -0,0 +1,2672 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/td3/td3_swimmer_performance.svg b/docs/images/plots/td3/td3_swimmer_performance.svg new file mode 100644 index 000000000..ac159c3d1 --- /dev/null +++ b/docs/images/plots/td3/td3_swimmer_performance.svg @@ -0,0 +1,2479 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/td3/td3_walker2d_performance.svg b/docs/images/plots/td3/td3_walker2d_performance.svg new file mode 100644 index 000000000..6d955fb63 --- /dev/null +++ b/docs/images/plots/td3/td3_walker2d_performance.svg @@ -0,0 +1,2632 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/tf1/tensorflow_ant_performance.svg b/docs/images/plots/tf1/tensorflow_ant_performance.svg new file mode 100644 index 000000000..be16e347a --- /dev/null +++ b/docs/images/plots/tf1/tensorflow_ant_performance.svg @@ -0,0 +1,8949 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/tf1/tensorflow_halfcheetah_performance.svg b/docs/images/plots/tf1/tensorflow_halfcheetah_performance.svg new file mode 100644 index 000000000..b82420f3e --- /dev/null +++ b/docs/images/plots/tf1/tensorflow_halfcheetah_performance.svg @@ -0,0 +1,8718 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/tf1/tensorflow_hopper_performance.svg b/docs/images/plots/tf1/tensorflow_hopper_performance.svg new file mode 100644 index 000000000..5316394d8 --- /dev/null +++ b/docs/images/plots/tf1/tensorflow_hopper_performance.svg @@ -0,0 +1,9729 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/tf1/tensorflow_swimmer_performance.svg b/docs/images/plots/tf1/tensorflow_swimmer_performance.svg new file mode 100644 index 000000000..073a8580b --- /dev/null +++ b/docs/images/plots/tf1/tensorflow_swimmer_performance.svg @@ -0,0 +1,8748 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/tf1/tensorflow_walker2d_performance.svg b/docs/images/plots/tf1/tensorflow_walker2d_performance.svg new file mode 100644 index 000000000..00d2c78f4 --- /dev/null +++ b/docs/images/plots/tf1/tensorflow_walker2d_performance.svg @@ -0,0 +1,9360 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/vpg/vpg_ant_performance.svg b/docs/images/plots/vpg/vpg_ant_performance.svg new file mode 100644 index 000000000..7ea28df99 --- /dev/null +++ b/docs/images/plots/vpg/vpg_ant_performance.svg @@ -0,0 +1,5082 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/vpg/vpg_halfcheetah_performance.svg b/docs/images/plots/vpg/vpg_halfcheetah_performance.svg new file mode 100644 index 000000000..0f00e1ab3 --- /dev/null +++ b/docs/images/plots/vpg/vpg_halfcheetah_performance.svg @@ -0,0 +1,4819 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/vpg/vpg_hopper_performance.svg b/docs/images/plots/vpg/vpg_hopper_performance.svg new file mode 100644 index 000000000..ef1b39c5a --- /dev/null +++ b/docs/images/plots/vpg/vpg_hopper_performance.svg @@ -0,0 +1,4836 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/vpg/vpg_swimmer_performance.svg b/docs/images/plots/vpg/vpg_swimmer_performance.svg new file mode 100644 index 000000000..5a0169724 --- /dev/null +++ b/docs/images/plots/vpg/vpg_swimmer_performance.svg @@ -0,0 +1,4302 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/plots/vpg/vpg_walker2d_performance.svg b/docs/images/plots/vpg/vpg_walker2d_performance.svg new file mode 100644 index 000000000..3fbac3929 --- /dev/null +++ b/docs/images/plots/vpg/vpg_walker2d_performance.svg @@ -0,0 +1,4778 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/spinningup/bench.rst b/docs/spinningup/bench.rst index 596b49559..be0152b6b 100644 --- a/docs/spinningup/bench.rst +++ b/docs/spinningup/bench.rst @@ -11,54 +11,143 @@ We benchmarked the Spinning Up algorithm implementations in five environments fr Performance in Each Environment =============================== -HalfCheetah ------------ +HalfCheetah: PyTorch Versions +----------------------------- -.. figure:: ../images/bench/bench_halfcheetah.svg +.. figure:: ../images/plots/pyt/pytorch_halfcheetah_performance.svg :align: center - 3M timestep benchmark for HalfCheetah-2. + 3M timestep benchmark for HalfCheetah-v3 using **PyTorch** implementations. -Hopper ------- +HalfCheetah: Tensorflow Versions +-------------------------------- -.. figure:: ../images/bench/bench_hopper.svg +.. figure:: ../images/plots/tf1/tensorflow_halfcheetah_performance.svg :align: center - 3M timestep benchmark for Hopper-v2. + 3M timestep benchmark for HalfCheetah-v3 using **Tensorflow** implementations. -Walker ------- -.. figure:: ../images/bench/bench_walker.svg + +Hopper: PyTorch Versions +------------------------ + +.. figure:: ../images/plots/pyt/pytorch_hopper_performance.svg + :align: center + + 3M timestep benchmark for Hopper-v3 using **PyTorch** implementations. + + +Hopper: Tensorflow Versions +--------------------------- + +.. figure:: ../images/plots/tf1/tensorflow_hopper_performance.svg + :align: center + + 3M timestep benchmark for Hopper-v3 using **Tensorflow** implementations. + + + + +Walker2d: PyTorch Versions +-------------------------- + +.. figure:: ../images/plots/pyt/pytorch_walker2d_performance.svg :align: center - 3M timestep benchmark for Walker2d-v2. + 3M timestep benchmark for Walker2d-v3 using **PyTorch** implementations. + -Swimmer -------- -.. figure:: ../images/bench/bench_swim.svg +Walker2d: Tensorflow Versions +----------------------------- + +.. figure:: ../images/plots/tf1/tensorflow_walker2d_performance.svg :align: center - 3M timestep benchmark for Swimmer-v2. + 3M timestep benchmark for Walker2d-v3 using **Tensorflow** implementations. + -Ant ---- -.. figure:: ../images/bench/bench_ant.svg + +Swimmer: PyTorch Versions +------------------------- + +.. figure:: ../images/plots/pyt/pytorch_swimmer_performance.svg :align: center - 3M timestep benchmark for Ant-v2. + 3M timestep benchmark for Swimmer-v3 using **PyTorch** implementations. + + +Swimmer: Tensorflow Versions +---------------------------- + +.. figure:: ../images/plots/tf1/tensorflow_swimmer_performance.svg + :align: center + + 3M timestep benchmark for Swimmer-v3 using **Tensorflow** implementations. + + + +Ant: PyTorch Versions +------------------------ + +.. figure:: ../images/plots/pyt/pytorch_ant_performance.svg + :align: center + + 3M timestep benchmark for Ant-v3 using **PyTorch** implementations. + + +Ant: Tensorflow Versions +--------------------------- + +.. figure:: ../images/plots/tf1/tensorflow_ant_performance.svg + :align: center + + 3M timestep benchmark for Ant-v3 using **Tensorflow** implementations. + Experiment Details ================== -**Random seeds.** The on-policy algorithms (VPG, TPRO, PPO) were run for 3 random seeds each, and the off-policy algorithms (DDPG, TD3, SAC) were run for 10 random seeds each. Graphs show the average (solid line) and std dev (shaded) of performance over random seed over the course of training. +**Random seeds.** All experiments were run for 10 random seeds each. Graphs show the average (solid line) and std dev (shaded) of performance over random seed over the course of training. **Performance metric.** Performance for the on-policy algorithms is measured as the average trajectory return across the batch collected at each epoch. Performance for the off-policy algorithms is measured once every 10,000 steps by running the deterministic policy (or, in the case of SAC, the mean policy) without action noise for ten trajectories, and reporting the average return over those test trajectories. -**Network architectures.** The on-policy algorithms use networks of size (64, 32) with tanh units for both the policy and the value function. The off-policy algorithms use networks of size (400, 300) with relu units. +**Network architectures.** The on-policy algorithms use networks of size (64, 32) with tanh units for both the policy and the value function. The off-policy algorithms use networks of size (256, 256) with relu units. **Batch size.** The on-policy algorithms collected 4000 steps of agent-environment interaction per batch update. The off-policy algorithms used minibatches of size 100 at each gradient descent step. -All other hyperparameters are left at default settings for the Spinning Up implementations. See algorithm pages for details. \ No newline at end of file +All other hyperparameters are left at default settings for the Spinning Up implementations. See algorithm pages for details. + +Learning curves are smoothed by averaging over a window of 11 epochs. + +.. admonition:: You Should Know + + By comparison to the literature, the Spinning Up implementations of DDPG, TD3, and SAC are roughly at-parity with the best reported results for these algorithms. As a result, you can use the Spinning Up implementations of these algorithms for research purposes. + + The Spinning Up implementations of VPG, TRPO, and PPO are overall a bit weaker than the best reported results for these algorithms. This is due to the absence of some standard tricks (such as observation normalization and normalized value regression targets) from our implementations. For research comparisons, you should use the implementations of TRPO or PPO from `OpenAI Baselines`_. + +.. _`OpenAI Baselines`: https://github.com/openai/baselines + + +PyTorch vs Tensorflow +===================== + + +We provide graphs for head-to-head comparisons between the PyTorch and Tensorflow implementations of each algorithm at the following pages: + +* `VPG Head-to-Head`_ + +* `PPO Head-to-Head`_ + +* `DDPG Head-to-Head`_ + +* `TD3 Head-to-Head`_ + +* `SAC Head-to-Head`_ + +.. _`VPG Head-to-Head`: ../spinningup/bench_vpg.html +.. _`PPO Head-to-Head`: ../spinningup/bench_ppo.html +.. _`DDPG Head-to-Head`: ../spinningup/bench_ddpg.html +.. _`TD3 Head-to-Head`: ../spinningup/bench_td3.html +.. _`SAC Head-to-Head`: ../spinningup/bench_sac.html \ No newline at end of file diff --git a/docs/spinningup/bench_ddpg.rst b/docs/spinningup/bench_ddpg.rst new file mode 100644 index 000000000..c87149d4c --- /dev/null +++ b/docs/spinningup/bench_ddpg.rst @@ -0,0 +1,35 @@ +DDPG Head-to-Head +================= + +HalfCheetah +----------- + +.. figure:: ../images/plots/ddpg/ddpg_halfcheetah_performance.svg + :align: center + + +Hopper +------ + +.. figure:: ../images/plots/ddpg/ddpg_hopper_performance.svg + :align: center + + +Walker2d +-------- + +.. figure:: ../images/plots/ddpg/ddpg_walker2d_performance.svg + :align: center + +Swimmer +------- + +.. figure:: ../images/plots/ddpg/ddpg_swimmer_performance.svg + :align: center + + +Ant +--- + +.. figure:: ../images/plots/ddpg/ddpg_ant_performance.svg + :align: center \ No newline at end of file diff --git a/docs/spinningup/bench_ppo.rst b/docs/spinningup/bench_ppo.rst new file mode 100644 index 000000000..c51809dba --- /dev/null +++ b/docs/spinningup/bench_ppo.rst @@ -0,0 +1,35 @@ +Proximal Policy Optimization Head-to-Head +========================================= + +HalfCheetah +----------- + +.. figure:: ../images/plots/ppo/ppo_halfcheetah_performance.svg + :align: center + + +Hopper +------ + +.. figure:: ../images/plots/ppo/ppo_hopper_performance.svg + :align: center + + +Walker2d +-------- + +.. figure:: ../images/plots/ppo/ppo_walker2d_performance.svg + :align: center + +Swimmer +------- + +.. figure:: ../images/plots/ppo/ppo_swimmer_performance.svg + :align: center + + +Ant +--- + +.. figure:: ../images/plots/ppo/ppo_ant_performance.svg + :align: center \ No newline at end of file diff --git a/docs/spinningup/bench_sac.rst b/docs/spinningup/bench_sac.rst new file mode 100644 index 000000000..3d1af49c9 --- /dev/null +++ b/docs/spinningup/bench_sac.rst @@ -0,0 +1,35 @@ +SAC Head-to-Head +================= + +HalfCheetah +----------- + +.. figure:: ../images/plots/sac/sac_halfcheetah_performance.svg + :align: center + + +Hopper +------ + +.. figure:: ../images/plots/sac/sac_hopper_performance.svg + :align: center + + +Walker2d +-------- + +.. figure:: ../images/plots/sac/sac_walker2d_performance.svg + :align: center + +Swimmer +------- + +.. figure:: ../images/plots/sac/sac_swimmer_performance.svg + :align: center + + +Ant +--- + +.. figure:: ../images/plots/sac/sac_ant_performance.svg + :align: center \ No newline at end of file diff --git a/docs/spinningup/bench_td3.rst b/docs/spinningup/bench_td3.rst new file mode 100644 index 000000000..aed7406b9 --- /dev/null +++ b/docs/spinningup/bench_td3.rst @@ -0,0 +1,35 @@ +TD3 Head-to-Head +================= + +HalfCheetah +----------- + +.. figure:: ../images/plots/td3/td3_halfcheetah_performance.svg + :align: center + + +Hopper +------ + +.. figure:: ../images/plots/td3/td3_hopper_performance.svg + :align: center + + +Walker2d +-------- + +.. figure:: ../images/plots/td3/td3_walker2d_performance.svg + :align: center + +Swimmer +------- + +.. figure:: ../images/plots/td3/td3_swimmer_performance.svg + :align: center + + +Ant +--- + +.. figure:: ../images/plots/td3/td3_ant_performance.svg + :align: center \ No newline at end of file diff --git a/docs/spinningup/bench_vpg.rst b/docs/spinningup/bench_vpg.rst new file mode 100644 index 000000000..8d2dfe48c --- /dev/null +++ b/docs/spinningup/bench_vpg.rst @@ -0,0 +1,35 @@ +Vanilla Policy Gradients Head-to-Head +===================================== + +HalfCheetah +----------- + +.. figure:: ../images/plots/vpg/vpg_halfcheetah_performance.svg + :align: center + + +Hopper +------ + +.. figure:: ../images/plots/vpg/vpg_hopper_performance.svg + :align: center + + +Walker2d +-------- + +.. figure:: ../images/plots/vpg/vpg_walker2d_performance.svg + :align: center + +Swimmer +------- + +.. figure:: ../images/plots/vpg/vpg_swimmer_performance.svg + :align: center + + +Ant +--- + +.. figure:: ../images/plots/vpg/vpg_ant_performance.svg + :align: center \ No newline at end of file diff --git a/docs/spinningup/exercise2_2_soln.rst b/docs/spinningup/exercise2_2_soln.rst index e194eda6a..a0090d77b 100644 --- a/docs/spinningup/exercise2_2_soln.rst +++ b/docs/spinningup/exercise2_2_soln.rst @@ -8,8 +8,13 @@ Solution to Exercise 2.2 Learning curves for DDPG in HalfCheetah-v2 for bugged and non-bugged actor-critic implementations, averaged over three random seeds. -The Bug in the Code -=================== +.. admonition:: You Should Know + + This page will give the solution primarily in terms of a detailed analysis of the Tensorflow version of this exercise. However, the problem in the PyTorch version is basically the same and so is its solution. + + +The Bug in the Code: Tensorflow Version +======================================= The only difference between the correct actor-critic code, @@ -54,8 +59,48 @@ and the bugged actor-critic code, is the tensor shape for the Q-functions. The correct version squeezes ouputs so that they have shape ``[batch size]``, whereas the bugged version doesn't, resulting in Q-functions with shape ``[batch size, 1]``. -How it Gums Up the Works -======================== +The Bug in the Code: PyTorch Version +==================================== + +In the PyTorch version of the exercise, the difference is virtually the same. The correct actor-critic code computes a forward pass on the Q-function that squeezes its output: + + +.. code-block:: python + :emphasize-lines: 12 + + """ + Correct Q-Function + """ + class MLPQFunction(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs, act): + q = self.q(torch.cat([obs, act], dim=-1)) + return torch.squeeze(q, -1) # Critical to ensure q has right shape. + + +while the bugged version does not: + +.. code-block:: python + :emphasize-lines: 11 + + """ + Bugged Q-Function + """ + class BuggedMLPQFunction(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs, act): + return self.q(torch.cat([obs, act], dim=-1)) + +How it Gums Up the Works: Tensorflow Version +============================================ Consider the excerpt from the part in the code that builds the DDPG computation graph: @@ -143,4 +188,20 @@ and a messed up loss function \frac{1}{n^2} \sum_{i=1}^n \sum_{j=1}^n (q_j - z_{ij})^2. -If you leave this to run in HalfCheetah long enough, you'll actually see some non-trivial learning process, because weird details specific to this environment partly cancel out the errors. But almost everywhere else, it fails completely. \ No newline at end of file +If you leave this to run in HalfCheetah long enough, you'll actually see some non-trivial learning process, because weird details specific to this environment partly cancel out the errors. But almost everywhere else, it fails completely. + + +How it Gums Up the Works: PyTorch Version +========================================= + +Exactly the same broadcasting shenanigans as in the Tensorflow version. Check out `this note`_ in the PyTorch documentation about it. + + +.. figure:: ../images/ex2-2_ddpg_bug_pytorch.png + :align: center + + Learning curves for DDPG in HalfCheetah-v2 for bugged and non-bugged actor-critic implementations using PyTorch, averaged over three random seeds. + + + +.. _`this note`: https://pytorch.org/docs/stable/notes/broadcasting.html#backwards-compatibility \ No newline at end of file diff --git a/docs/spinningup/exercises.rst b/docs/spinningup/exercises.rst index 937391805..1fd94af57 100644 --- a/docs/spinningup/exercises.rst +++ b/docs/spinningup/exercises.rst @@ -11,11 +11,20 @@ Problem Set 1: Basics of Implementation .. admonition:: Exercise 1.1: Gaussian Log-Likelihood - **Path to Exercise.** ``spinup/exercises/problem_set_1/exercise1_1.py`` + **Path to Exercise:** - **Path to Solution.** ``spinup/exercises/problem_set_1_solutions/exercise1_1_soln.py`` + * PyTorch version: ``spinup/exercises/pytorch/problem_set_1/exercise1_1.py`` + + * Tensorflow version: ``spinup/exercises/tf1/problem_set_1/exercise1_1.py`` - **Instructions.** Write a function which takes in Tensorflow symbols for the means and log stds of a batch of diagonal Gaussian distributions, along with a Tensorflow placeholder for (previously-generated) samples from those distributions, and returns a Tensorflow symbol for computing the log likelihoods of those samples. + **Path to Solution:** + + * PyTorch version: ``spinup/exercises/pytorch/problem_set_1_solutions/exercise1_1_soln.py`` + + * Tensorflow version: ``spinup/exercises/tf1/problem_set_1_solutions/exercise1_1_soln.py`` + + + **Instructions.** Write a function that takes in the means and log stds of a batch of diagonal Gaussian distributions, along with (previously-generated) samples from those distributions, and returns the log likelihoods of those samples. (In the Tensorflow version, you will write a function that creates computation graph operations to do this; in the PyTorch version, you will directly operate on given Tensors.) You may find it useful to review the formula given in `this section of the RL introduction`_. @@ -28,9 +37,17 @@ Problem Set 1: Basics of Implementation .. admonition:: Exercise 1.2: Policy for PPO - **Path to Exercise.** ``spinup/exercises/problem_set_1/exercise1_2.py`` + **Path to Exercise:** + + * PyTorch version: ``spinup/exercises/pytorch/problem_set_1/exercise1_2.py`` + + * Tensorflow version: ``spinup/exercises/tf1/problem_set_1/exercise1_2.py`` + + **Path to Solution:** - **Path to Solution.** ``spinup/exercises/problem_set_1_solutions/exercise1_2_soln.py`` + * PyTorch version: ``spinup/exercises/pytorch/problem_set_1_solutions/exercise1_2_soln.py`` + + * Tensorflow version: ``spinup/exercises/tf1/problem_set_1_solutions/exercise1_2_soln.py`` **Instructions.** Implement an MLP diagonal Gaussian policy for PPO. @@ -41,13 +58,21 @@ Problem Set 1: Basics of Implementation .. admonition:: Exercise 1.3: Computation Graph for TD3 - **Path to Exercise.** ``spinup/exercises/problem_set_1/exercise1_3.py`` + **Path to Exercise.** + + * PyTorch version: ``spinup/exercises/pytorch/problem_set_1/exercise1_3.py`` + + * Tensorflow version: ``spinup/exercises/tf1/problem_set_1/exercise1_3.py`` + + **Path to Solution.** - **Path to Solution.** ``spinup/algos/td3/td3.py`` + * PyTorch version: ``spinup/algos/pytorch/td3/td3.py`` - **Instructions.** Implement the core computation graph for the TD3 algorithm. + * Tensorflow version: ``spinup/algos/tf1/td3/td3.py`` - As starter code, you are given the entirety of the TD3 algorithm except for the computation graph. Find "YOUR CODE HERE" to begin. + **Instructions.** Implement the main mathematical logic for the TD3 algorithm. + + As starter code, you are given the entirety of the TD3 algorithm except for the main mathematical logic (essentially, the loss functions and intermediate calculations needed for them). Find "YOUR CODE HERE" to begin. You may find it useful to review the pseudocode in our `page on TD3`_. @@ -83,13 +108,17 @@ Problem Set 2: Algorithm Failure Modes .. admonition:: Exercise 2.2: Silent Bug in DDPG - **Path to Exercise.** ``spinup/exercises/problem_set_2/exercise2_2.py`` + **Path to Exercise.** + + * PyTorch version: ``spinup/exercises/pytorch/problem_set_2/exercise2_2.py`` + + * Tensorflow version: ``spinup/exercises/tf1/problem_set_2/exercise2_2.py`` **Path to Solution.** `Solution available here. <../spinningup/exercise2_2_soln.html>`_ The hardest part of writing RL code is dealing with bugs, because failures are frequently silent. The code will appear to run correctly, but the agent's performance will degrade relative to a bug-free implementation---sometimes to the extent that it never learns anything. - In this exercise, you will observe a bug in vivo and compare results against correct code. + In this exercise, you will observe a bug in vivo and compare results against correct code. The bug is the same (conceptually, if not in exact implementation) for both the PyTorch and Tensorflow versions of this exercise. **Instructions.** Run ``exercise2_2.py``, which will launch DDPG experiments with and without a bug. The non-bugged version runs the default Spinning Up implementation of DDPG, using a default method for creating the actor and critic networks. The bugged version runs the same DDPG code, except uses a bugged method for creating the networks. @@ -97,7 +126,7 @@ Problem Set 2: Algorithm Failure Modes Without referencing the correct actor-critic code (which is to say---don't look in DDPG's ``core.py`` file), try to figure out what the bug is and explain how it breaks things. - **Hint.** To figure out what's going wrong, think about how the DDPG code implements the DDPG computation graph. Specifically, look at this excerpt: + **Hint.** To figure out what's going wrong, think about how the DDPG code implements the DDPG computation graph. For the Tensorflow version, look at this excerpt: .. code-block:: python diff --git a/docs/spinningup/extra_tf_pg_implementation.rst b/docs/spinningup/extra_tf_pg_implementation.rst new file mode 100644 index 000000000..d2bd7769f --- /dev/null +++ b/docs/spinningup/extra_tf_pg_implementation.rst @@ -0,0 +1,181 @@ +================================================================== +Extra Material: Tensorflow Policy Gradient Implementation Examples +================================================================== + + +Implementing the Simplest Policy Gradient +========================================= + +We give a short Tensorflow implementation of this simple version of the policy gradient algorithm in ``spinup/examples/tf1/pg_math/1_simple_pg.py``. (It can also be viewed `on github `_.) It is only 122 lines long, so we highly recommend reading through it in depth. While we won't go through the entirety of the code here, we'll highlight and explain a few important pieces. + +**1. Making the Policy Network.** + +.. code-block:: python + :linenos: + :lineno-start: 25 + + # make core of policy network + obs_ph = tf.placeholder(shape=(None, obs_dim), dtype=tf.float32) + logits = mlp(obs_ph, sizes=hidden_sizes+[n_acts]) + + # make action selection op (outputs int actions, sampled from policy) + actions = tf.squeeze(tf.multinomial(logits=logits,num_samples=1), axis=1) + +This block builds a feedforward neural network categorical policy. (See the `Stochastic Policies`_ section in Part 1 for a refresher.) The ``logits`` tensor can be used to construct log-probabilities and probabilities for actions, and the ``actions`` tensor samples actions based on the probabilities implied by ``logits``. + +.. _`Stochastic Policies`: ../spinningup/rl_intro.html#stochastic-policies + +**2. Making the Loss Function.** + +.. code-block:: python + :linenos: + :lineno-start: 32 + + # make loss function whose gradient, for the right data, is policy gradient + weights_ph = tf.placeholder(shape=(None,), dtype=tf.float32) + act_ph = tf.placeholder(shape=(None,), dtype=tf.int32) + action_masks = tf.one_hot(act_ph, n_acts) + log_probs = tf.reduce_sum(action_masks * tf.nn.log_softmax(logits), axis=1) + loss = -tf.reduce_mean(weights_ph * log_probs) + + +In this block, we build a "loss" function for the policy gradient algorithm. When the right data is plugged in, the gradient of this loss is equal to the policy gradient. The right data means a set of (state, action, weight) tuples collected while acting according to the current policy, where the weight for a state-action pair is the return from the episode to which it belongs. (Although as we will show in later subsections, there are other values you can plug in for the weight which also work correctly.) + + +.. admonition:: You Should Know + + Even though we describe this as a loss function, it is **not** a loss function in the typical sense from supervised learning. There are two main differences from standard loss functions. + + **1. The data distribution depends on the parameters.** A loss function is usually defined on a fixed data distribution which is independent of the parameters we aim to optimize. Not so here, where the data must be sampled on the most recent policy. + + **2. It doesn't measure performance.** A loss function usually evaluates the performance metric that we care about. Here, we care about expected return, :math:`J(\pi_{\theta})`, but our "loss" function does not approximate this at all, even in expectation. This "loss" function is only useful to us because, when evaluated at the current parameters, with data generated by the current parameters, it has the negative gradient of performance. + + But after that first step of gradient descent, there is no more connection to performance. This means that minimizing this "loss" function, for a given batch of data, has *no* guarantee whatsoever of improving expected return. You can send this loss to :math:`-\infty` and policy performance could crater; in fact, it usually will. Sometimes a deep RL researcher might describe this outcome as the policy "overfitting" to a batch of data. This is descriptive, but should not be taken literally because it does not refer to generalization error. + + We raise this point because it is common for ML practitioners to interpret a loss function as a useful signal during training---"if the loss goes down, all is well." In policy gradients, this intuition is wrong, and you should only care about average return. The loss function means nothing. + + + + +.. admonition:: You Should Know + + The approach used here to make the ``log_probs`` tensor---creating an action mask, and using it to select out particular log probabilities---*only* works for categorical policies. It does not work in general. + + + +**3. Running One Epoch of Training.** + +.. code-block:: python + :linenos: + :lineno-start: 45 + + # for training policy + def train_one_epoch(): + # make some empty lists for logging. + batch_obs = [] # for observations + batch_acts = [] # for actions + batch_weights = [] # for R(tau) weighting in policy gradient + batch_rets = [] # for measuring episode returns + batch_lens = [] # for measuring episode lengths + + # reset episode-specific variables + obs = env.reset() # first obs comes from starting distribution + done = False # signal from environment that episode is over + ep_rews = [] # list for rewards accrued throughout ep + + # render first episode of each epoch + finished_rendering_this_epoch = False + + # collect experience by acting in the environment with current policy + while True: + + # rendering + if not(finished_rendering_this_epoch): + env.render() + + # save obs + batch_obs.append(obs.copy()) + + # act in the environment + act = sess.run(actions, {obs_ph: obs.reshape(1,-1)})[0] + obs, rew, done, _ = env.step(act) + + # save action, reward + batch_acts.append(act) + ep_rews.append(rew) + + if done: + # if episode is over, record info about episode + ep_ret, ep_len = sum(ep_rews), len(ep_rews) + batch_rets.append(ep_ret) + batch_lens.append(ep_len) + + # the weight for each logprob(a|s) is R(tau) + batch_weights += [ep_ret] * ep_len + + # reset episode-specific variables + obs, done, ep_rews = env.reset(), False, [] + + # won't render again this epoch + finished_rendering_this_epoch = True + + # end experience loop if we have enough of it + if len(batch_obs) > batch_size: + break + + # take a single policy gradient update step + batch_loss, _ = sess.run([loss, train_op], + feed_dict={ + obs_ph: np.array(batch_obs), + act_ph: np.array(batch_acts), + weights_ph: np.array(batch_weights) + }) + return batch_loss, batch_rets, batch_lens + +The ``train_one_epoch()`` function runs one "epoch" of policy gradient, which we define to be + +1) the experience collection step (L62-97), where the agent acts for some number of episodes in the environment using the most recent policy, followed by + +2) a single policy gradient update step (L99-105). + +The main loop of the algorithm just repeatedly calls ``train_one_epoch()``. + + + + +Implementing Reward-to-Go Policy Gradient +========================================= + +We give a short Tensorflow implementation of the reward-to-go policy gradient in ``spinup/examples/tf1/pg_math/2_rtg_pg.py``. (It can also be viewed `on github `_.) + +The only thing that has changed from ``1_simple_pg.py`` is that we now use different weights in the loss function. The code modification is very slight: we add a new function, and change two other lines. The new function is: + +.. code-block:: python + :linenos: + :lineno-start: 12 + + def reward_to_go(rews): + n = len(rews) + rtgs = np.zeros_like(rews) + for i in reversed(range(n)): + rtgs[i] = rews[i] + (rtgs[i+1] if i+1 < n else 0) + return rtgs + + +And then we tweak the old L86-87 from: + +.. code-block:: python + :linenos: + :lineno-start: 86 + + # the weight for each logprob(a|s) is R(tau) + batch_weights += [ep_ret] * ep_len + +to: + +.. code-block:: python + :linenos: + :lineno-start: 93 + + # the weight for each logprob(a_t|s_t) is reward-to-go from t + batch_weights += list(reward_to_go(ep_rews)) diff --git a/docs/spinningup/rl_intro.rst b/docs/spinningup/rl_intro.rst index 2c159ca09..604bc6584 100644 --- a/docs/spinningup/rl_intro.rst +++ b/docs/spinningup/rl_intro.rst @@ -122,15 +122,29 @@ We often denote the parameters of such a policy by :math:`\theta` or :math:`\phi Deterministic Policies ^^^^^^^^^^^^^^^^^^^^^^ -**Example: Deterministic Policies.** Here is a code snippet for building a simple deterministic policy for a continuous action space in Tensorflow: +**Example: Deterministic Policies.** Here is a code snippet for building a simple deterministic policy for a continuous action space in PyTorch, using the ``torch.nn`` package: .. code-block:: python - obs = tf.placeholder(shape=(None, obs_dim), dtype=tf.float32) - net = mlp(obs, hidden_dims=(64,64), activation=tf.tanh) - actions = tf.layers.dense(net, units=act_dim, activation=None) + pi_net = nn.Sequential( + nn.Linear(obs_dim, 64), + nn.Tanh(), + nn.Linear(64, 64), + nn.Tanh(), + nn.Linear(64, act_dim) + ) + +This builds a multi-layer perceptron (MLP) network with two hidden layers of size 64 and :math:`\tanh` activation functions. If ``obs`` is a Numpy array containing a batch of observations, ``pi_net`` can be used to obtain a batch of actions as follows: + +.. code-block:: python + + obs_tensor = torch.as_tensor(obs, dtype=torch.float32) + actions = pi_net(obs_tensor) + +.. admonition:: You Should Know + + Don't worry about it if this neural network stuff is unfamiliar to you---this tutorial will focus on RL, and not on the neural network side of things. So you can skip this example and come back to it later. But we figured that if you already knew, it could be helpful. -where ``mlp`` is a function that stacks multiple ``dense`` layers on top of each other with the given sizes and activation. Stochastic Policies ^^^^^^^^^^^^^^^^^^^ @@ -150,7 +164,7 @@ In what follows, we'll describe how to do these for both categorical and diagona A categorical policy is like a classifier over discrete actions. You build the neural network for a categorical policy the same way you would for a classifier: the input is the observation, followed by some number of layers (possibly convolutional or densely-connected, depending on the kind of input), and then you have one final linear layer that gives you logits for each action, followed by a `softmax`_ to convert the logits into probabilities. - **Sampling.** Given the probabilities for each action, frameworks like Tensorflow have built-in tools for sampling. For example, see the `tf.distributions.Categorical`_ documentation, or `tf.multinomial`_. + **Sampling.** Given the probabilities for each action, frameworks like PyTorch and Tensorflow have built-in tools for sampling. For example, see the documentation for `Categorical distributions in PyTorch`_, `torch.multinomial`_, `tf.distributions.Categorical`_, or `tf.multinomial`_. **Log-Likelihood.** Denote the last layer of probabilities as :math:`P_{\theta}(s)`. It is a vector with however many entries as there are actions, so we can treat the actions as indices for the vector. The log likelihood for an action :math:`a` can then be obtained by indexing into the vector: @@ -177,7 +191,7 @@ In what follows, we'll describe how to do these for both categorical and diagona a = \mu_{\theta}(s) + \sigma_{\theta}(s) \odot z, - where :math:`\odot` denotes the elementwise product of two vectors. Standard frameworks have built-in ways to compute the noise vectors, such as `tf.random_normal`_. Alternatively, you can just provide the mean and standard deviation directly to a `tf.distributions.Normal`_ object and use that to sample. + where :math:`\odot` denotes the elementwise product of two vectors. Standard frameworks have built-in ways to generate the noise vectors, such as `torch.normal`_ or `tf.random_normal`_. Alternatively, you can build distribution objects, eg through `torch.distributions.Normal`_ or `tf.distributions.Normal`_, and use them to generate samples. (The advantage of the latter approach is that those objects can also calculate log-likelihoods for you.) **Log-Likelihood.** The log-likelihood of a :math:`k` -dimensional action :math:`a`, for a diagonal Gaussian with mean :math:`\mu = \mu_{\theta}(s)` and standard deviation :math:`\sigma = \sigma_{\theta}(s)`, is given by @@ -190,10 +204,14 @@ In what follows, we'll describe how to do these for both categorical and diagona .. _`Categorical`: https://en.wikipedia.org/wiki/Categorical_distribution .. _`Gaussian`: https://en.wikipedia.org/wiki/Multivariate_normal_distribution .. _`softmax`: https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/softmax -.. _`tf.distributions.Categorical`: https://www.tensorflow.org/api_docs/python/tf/distributions/Categorical -.. _`tf.multinomial`: https://www.tensorflow.org/api_docs/python/tf/multinomial -.. _`tf.random_normal`: https://www.tensorflow.org/api_docs/python/tf/random_normal -.. _`tf.distributions.Normal`: https://www.tensorflow.org/api_docs/python/tf/distributions/Normal +.. _`Categorical distributions in PyTorch`: https://pytorch.org/docs/stable/distributions.html#categorical +.. _`torch.multinomial`: https://pytorch.org/docs/stable/torch.html#torch.multinomial +.. _`tf.distributions.Categorical`: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/distributions/Categorical +.. _`tf.multinomial`: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/random/multinomial +.. _`torch.normal`: https://pytorch.org/docs/stable/torch.html#torch.normal +.. _`tf.random_normal`: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/random/normal +.. _`torch.distributions.Normal`: https://pytorch.org/docs/stable/distributions.html#normal +.. _`tf.distributions.Normal`: https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/distributions/Normal Trajectories ------------ diff --git a/docs/spinningup/rl_intro3.rst b/docs/spinningup/rl_intro3.rst index 0c96e47ab..34e4d5d57 100644 --- a/docs/spinningup/rl_intro3.rst +++ b/docs/spinningup/rl_intro3.rst @@ -100,37 +100,58 @@ This last expression is the simplest version of the computable expression we des Implementing the Simplest Policy Gradient ========================================= -We give a short Tensorflow implementation of this simple version of the policy gradient algorithm in ``spinup/examples/pg_math/1_simple_pg.py``. (It can also be viewed `on github `_.) It is only 122 lines long, so we highly recommend reading through it in depth. While we won't go through the entirety of the code here, we'll highlight and explain a few important pieces. +We give a short PyTorch implementation of this simple version of the policy gradient algorithm in ``spinup/examples/pytorch/pg_math/1_simple_pg.py``. (It can also be viewed `on github `_.) It is only 128 lines long, so we highly recommend reading through it in depth. While we won't go through the entirety of the code here, we'll highlight and explain a few important pieces. + + +.. admonition:: You Should Know + + This section was previously written with a Tensorflow example. The old Tensorflow section can be found `here <../spinningup/extra_tf_pg_implementation.html#implementing-the-simplest-policy-gradient>`_. **1. Making the Policy Network.** .. code-block:: python :linenos: - :lineno-start: 25 + :lineno-start: 30 # make core of policy network - obs_ph = tf.placeholder(shape=(None, obs_dim), dtype=tf.float32) - logits = mlp(obs_ph, sizes=hidden_sizes+[n_acts]) + logits_net = mlp(sizes=[obs_dim]+hidden_sizes+[n_acts]) + + # make function to compute action distribution + def get_policy(obs): + logits = logits_net(obs) + return Categorical(logits=logits) - # make action selection op (outputs int actions, sampled from policy) - actions = tf.squeeze(tf.multinomial(logits=logits,num_samples=1), axis=1) + # make action selection function (outputs int actions, sampled from policy) + def get_action(obs): + return get_policy(obs).sample().item() + +This block builds modules and functions for using a feedforward neural network categorical policy. (See the `Stochastic Policies`_ section in Part 1 for a refresher.) The output from the ``logits_net`` module can be used to construct log-probabilities and probabilities for actions, and the ``get_action`` function samples actions based on probabilities computed from the logits. (Note: this particular ``get_action`` function assumes that there will only be one ``obs`` provided, and therefore only one integer action output. That's why it uses ``.item()``, which is used to `get the contents of a Tensor with only one element`_.) + +A lot of work in this example is getting done by the ``Categorical`` object on L36. This is a PyTorch ``Distribution`` object that wraps up some mathematical functions associated with probability distributions. In particular, it has a method for sampling from the distribution (which we use on L40) and a method for computing log probabilities of given samples (which we use later). Since PyTorch distributions are really useful for RL, check out `their documentation`_ to get a feel for how they work. + +.. admonition:: You Should Know + + Friendly reminder! When we talk about a categorical distribution having "logits," what we mean is that the probabilities for each outcome are given by the Softmax function of the logits. That is, the probability for action :math:`j` under a categorical distribution with logits :math:`x_j` is: + + .. math:: + + p_j = \frac{\exp(x_j)}{\sum_{i} \exp(x_i)} -This block builds a feedforward neural network categorical policy. (See the `Stochastic Policies`_ section in Part 1 for a refresher.) The ``logits`` tensor can be used to construct log-probabilities and probabilities for actions, and the ``actions`` tensor samples actions based on the probabilities implied by ``logits``. .. _`Stochastic Policies`: ../spinningup/rl_intro.html#stochastic-policies +.. _`their documentation`: https://pytorch.org/docs/stable/distributions.html +.. _`get the contents of a Tensor with only one element`: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.item **2. Making the Loss Function.** .. code-block:: python :linenos: - :lineno-start: 32 + :lineno-start: 42 # make loss function whose gradient, for the right data, is policy gradient - weights_ph = tf.placeholder(shape=(None,), dtype=tf.float32) - act_ph = tf.placeholder(shape=(None,), dtype=tf.int32) - action_masks = tf.one_hot(act_ph, n_acts) - log_probs = tf.reduce_sum(action_masks * tf.nn.log_softmax(logits), axis=1) - loss = -tf.reduce_mean(weights_ph * log_probs) + def compute_loss(obs, act, weights): + logp = get_policy(obs).log_prob(act) + return -(logp * weights).mean() In this block, we build a "loss" function for the policy gradient algorithm. When the right data is plugged in, the gradient of this loss is equal to the policy gradient. The right data means a set of (state, action, weight) tuples collected while acting according to the current policy, where the weight for a state-action pair is the return from the episode to which it belongs. (Although as we will show in later subsections, there are other values you can plug in for the weight which also work correctly.) @@ -153,87 +174,100 @@ In this block, we build a "loss" function for the policy gradient algorithm. Whe .. admonition:: You Should Know - The approach used here to make the ``log_probs`` tensor---creating an action mask, and using it to select out particular log probabilities---*only* works for categorical policies. It does not work in general. - - - -**3. Running One Epoch of Training.** - -.. code-block:: python - :linenos: - :lineno-start: 45 - - # for training policy - def train_one_epoch(): - # make some empty lists for logging. - batch_obs = [] # for observations - batch_acts = [] # for actions - batch_weights = [] # for R(tau) weighting in policy gradient - batch_rets = [] # for measuring episode returns - batch_lens = [] # for measuring episode lengths - - # reset episode-specific variables - obs = env.reset() # first obs comes from starting distribution - done = False # signal from environment that episode is over - ep_rews = [] # list for rewards accrued throughout ep - - # render first episode of each epoch - finished_rendering_this_epoch = False - - # collect experience by acting in the environment with current policy - while True: + The approach used here to make the ``logp`` tensor--calling the ``log_prob`` method of a PyTorch ``Categorical`` object--may require some modification to work with other kinds of distribution objects. - # rendering - if not(finished_rendering_this_epoch): - env.render() + For example, if you are using a `Normal distribution`_ (for a diagonal Gaussian policy), the output from calling ``policy.log_prob(act)`` will give you a Tensor containing separate log probabilities for each component of each vector-valued action. That is to say, you put in a Tensor of shape ``(batch, act_dim)``, and get out a Tensor of shape ``(batch, act_dim)``, when what you need for making an RL loss is a Tensor of shape ``(batch,)``. In that case, you would sum up the log probabilities of the action components to get the log probabilities of the actions. That is, you would compute: - # save obs - batch_obs.append(obs.copy()) + .. code-block:: python - # act in the environment - act = sess.run(actions, {obs_ph: obs.reshape(1,-1)})[0] - obs, rew, done, _ = env.step(act) + logp = get_policy(obs).log_prob(act).sum(axis=-1) - # save action, reward - batch_acts.append(act) - ep_rews.append(rew) - if done: - # if episode is over, record info about episode - ep_ret, ep_len = sum(ep_rews), len(ep_rews) - batch_rets.append(ep_ret) - batch_lens.append(ep_len) +.. _`Normal distribution`: https://pytorch.org/docs/stable/distributions.html#normal - # the weight for each logprob(a|s) is R(tau) - batch_weights += [ep_ret] * ep_len - - # reset episode-specific variables - obs, done, ep_rews = env.reset(), False, [] - # won't render again this epoch - finished_rendering_this_epoch = True - - # end experience loop if we have enough of it - if len(batch_obs) > batch_size: - break +**3. Running One Epoch of Training.** - # take a single policy gradient update step - batch_loss, _ = sess.run([loss, train_op], - feed_dict={ - obs_ph: np.array(batch_obs), - act_ph: np.array(batch_acts), - weights_ph: np.array(batch_weights) - }) - return batch_loss, batch_rets, batch_lens +.. code-block:: python + :linenos: + :lineno-start: 50 + + # for training policy + def train_one_epoch(): + # make some empty lists for logging. + batch_obs = [] # for observations + batch_acts = [] # for actions + batch_weights = [] # for R(tau) weighting in policy gradient + batch_rets = [] # for measuring episode returns + batch_lens = [] # for measuring episode lengths + + # reset episode-specific variables + obs = env.reset() # first obs comes from starting distribution + done = False # signal from environment that episode is over + ep_rews = [] # list for rewards accrued throughout ep + + # render first episode of each epoch + finished_rendering_this_epoch = False + + # collect experience by acting in the environment with current policy + while True: + + # rendering + if (not finished_rendering_this_epoch) and render: + env.render() + + # save obs + batch_obs.append(obs.copy()) + + # act in the environment + act = get_action(torch.as_tensor(obs, dtype=torch.float32)) + obs, rew, done, _ = env.step(act) + + # save action, reward + batch_acts.append(act) + ep_rews.append(rew) + + if done: + # if episode is over, record info about episode + ep_ret, ep_len = sum(ep_rews), len(ep_rews) + batch_rets.append(ep_ret) + batch_lens.append(ep_len) + + # the weight for each logprob(a|s) is R(tau) + batch_weights += [ep_ret] * ep_len + + # reset episode-specific variables + obs, done, ep_rews = env.reset(), False, [] + + # won't render again this epoch + finished_rendering_this_epoch = True + + # end experience loop if we have enough of it + if len(batch_obs) > batch_size: + break + + # take a single policy gradient update step + optimizer.zero_grad() + batch_loss = compute_loss(obs=torch.as_tensor(batch_obs, dtype=torch.float32), + act=torch.as_tensor(batch_acts, dtype=torch.int32), + weights=torch.as_tensor(batch_weights, dtype=torch.float32) + ) + batch_loss.backward() + optimizer.step() + return batch_loss, batch_rets, batch_lens The ``train_one_epoch()`` function runs one "epoch" of policy gradient, which we define to be -1) the experience collection step (L62-97), where the agent acts for some number of episodes in the environment using the most recent policy, followed by +1) the experience collection step (L67-102), where the agent acts for some number of episodes in the environment using the most recent policy, followed by -2) a single policy gradient update step (L99-105). +2) a single policy gradient update step (L104-111). The main loop of the algorithm just repeatedly calls ``train_one_epoch()``. +.. admonition:: You Should Know + + If you aren't already familiar with optimization in PyTorch, observe the pattern for taking one gradient descent step as shown in lines 104-111. First, clear the gradient buffers. Then, compute the loss function. Then, compute a backward pass on the loss function; this accumulates fresh gradients into the gradient buffers. Finally, take a step with the optimizer. + Expected Grad-Log-Prob Lemma ============================ @@ -311,13 +345,14 @@ An (optional) proof of this claim can be found `here`_, and it ultimately depend Implementing Reward-to-Go Policy Gradient ========================================= -We give a short Tensorflow implementation of the reward-to-go policy gradient in ``spinup/examples/pg_math/2_rtg_pg.py``. (It can also be viewed `on github `_.) + +We give a short PyTorch implementation of the reward-to-go policy gradient in ``spinup/examples/pytorch/pg_math/2_rtg_pg.py``. (It can also be viewed `on github `_.) The only thing that has changed from ``1_simple_pg.py`` is that we now use different weights in the loss function. The code modification is very slight: we add a new function, and change two other lines. The new function is: .. code-block:: python :linenos: - :lineno-start: 12 + :lineno-start: 17 def reward_to_go(rews): n = len(rews) @@ -327,11 +362,11 @@ The only thing that has changed from ``1_simple_pg.py`` is that we now use diffe return rtgs -And then we tweak the old L86-87 from: +And then we tweak the old L91-92 from: .. code-block:: python :linenos: - :lineno-start: 86 + :lineno-start: 91 # the weight for each logprob(a|s) is R(tau) batch_weights += [ep_ret] * ep_len @@ -340,12 +375,16 @@ to: .. code-block:: python :linenos: - :lineno-start: 93 + :lineno-start: 98 # the weight for each logprob(a_t|s_t) is reward-to-go from t batch_weights += list(reward_to_go(ep_rews)) +.. admonition:: You Should Know + + This section was previously written with a Tensorflow example. The old Tensorflow section can be found `here <../spinningup/extra_tf_pg_implementation.html#implementing-reward-to-go-policy-gradient>`_. + Baselines in Policy Gradients ============================= diff --git a/docs/user/algorithms.rst b/docs/user/algorithms.rst index 8778b675c..d41083e58 100644 --- a/docs/user/algorithms.rst +++ b/docs/user/algorithms.rst @@ -18,6 +18,8 @@ The following algorithms are implemented in the Spinning Up package: They are all implemented with `MLP`_ (non-recurrent) actor-critics, making them suitable for fully-observed, non-image-based RL environments, e.g. the `Gym Mujoco`_ environments. +Spinning Up has two implementations for each algorithm (except for TRPO): one that uses `PyTorch`_ as the neural network library, and one that uses `Tensorflow v1`_ as the neural network library. (TRPO is currently only available in Tensorflow.) + .. _`Gym Mujoco`: https://gym.openai.com/envs/#mujoco .. _`Vanilla Policy Gradient`: ../algorithms/vpg.html .. _`Trust Region Policy Optimization`: ../algorithms/trpo.html @@ -26,6 +28,8 @@ They are all implemented with `MLP`_ (non-recurrent) actor-critics, making them .. _`Twin Delayed DDPG`: ../algorithms/td3.html .. _`Soft Actor-Critic`: ../algorithms/sac.html .. _`MLP`: https://en.wikipedia.org/wiki/Multilayer_perceptron +.. _`PyTorch`: https://pytorch.org/ +.. _`Tensorflow v1`: https://www.tensorflow.org/versions/r1.15/api_docs Why These Algorithms? @@ -56,13 +60,48 @@ Code Format All implementations in Spinning Up adhere to a standard template. They are split into two files: an algorithm file, which contains the core logic of the algorithm, and a core file, which contains various utilities needed to run the algorithm. -The Algorithm File ------------------- +The algorithm file always starts with a class definition for an experience buffer object, which is used to store information from agent-environment interactions. Next, there is a single function which runs the algorithm. The algorithm function follows a template that is roughly the same across the PyTorch and Tensorflow versions, but we'll break it down for each separately below. Finally, there's some support in each algorithm file for directly running the algorithm in Gym environments from the command line (though this is not the recommended way to run the algorithms---we'll describe how to do that on the `Running Experiments`_ page). + +.. _`Running Experiments`: ../user/running.html -The algorithm file always starts with a class definition for an experience buffer object, which is used to store information from agent-environment interactions. +The Algorithm Function: PyTorch Version +--------------------------------------- + +The algorithm function for a PyTorch implementation performs the following tasks in (roughly) this order: + + 1) Logger setup + + 2) Random seed setting + + 3) Environment instantiation + + 4) Constructing the actor-critic PyTorch module via the ``actor_critic`` function passed to the algorithm function as an argument + + 5) Instantiating the experience buffer + + 6) Setting up callable loss functions that also provide diagnostics specific to the algorithm + + 7) Making PyTorch optimizers + + 8) Setting up model saving through the logger -Next, there is a single function which runs the algorithm, performing the following tasks (in this order): + 9) Setting up an update function that runs one epoch of optimization or one step of descent + + 10) Running the main loop of the algorithm: + + a) Run the agent in the environment + b) Periodically update the parameters of the agent according to the main equations of the algorithm + + c) Log key performance metrics and save agent + + + +The Algorithm Function: Tensorflow Version +------------------------------------------ + +The algorithm function for a Tensorflow implementation performs the following tasks in (roughly) this order: + 1) Logger setup 2) Random seed setting @@ -94,15 +133,13 @@ Next, there is a single function which runs the algorithm, performing the follow c) Log key performance metrics and save agent -Finally, there's some support for directly running the algorithm in Gym environments from the command line. - The Core File ------------- The core files don't adhere as closely as the algorithms files to a template, but do have some approximate structure: - 1) Functions related to making and managing placeholders + 1) **Tensorflow only:** Functions related to making and managing placeholders 2) Functions for building sections of computation graph relevant to the ``actor_critic`` method for a particular algorithm diff --git a/docs/user/introduction.rst b/docs/user/introduction.rst index 11769f576..b6c625c38 100644 --- a/docs/user/introduction.rst +++ b/docs/user/introduction.rst @@ -70,24 +70,35 @@ They are almost completely self-contained, with virtually no common code shared Importantly, they're all structured similarly, so if you clearly understand one, jumping into the next is painless. -We tried to minimize the number of tricks used in each algorithm's implementation, and minimize the differences between otherwise-similar algorithms. To give some examples of removed tricks: we omit regularization_ terms present in the original Soft-Actor Critic code, as well as `observation normalization`_ from all algorithms. For an example of where we've removed differences between algorithms: our implementations of DDPG, TD3, and SAC all follow a convention laid out in the `original TD3 code`_, where all gradient descent updates are performed at the ends of episodes (instead of happening all throughout the episode). +We tried to minimize the number of tricks used in each algorithm's implementation, and minimize the differences between otherwise-similar algorithms. To give some examples of removed tricks: we omit regularization_ terms present in the original Soft-Actor Critic code, as well as `observation normalization`_ from all algorithms. For an example of where we've removed differences between algorithms: our implementations of DDPG, TD3, and SAC all follow a convention of running gradient descent updates after fixed intervals of environment interaction. (By contrast, other public implementations of these algorithms usually take slightly different approaches from each other, making them a little bit harder to compare.) All algorithms are "reasonably good" in the sense that they achieve roughly the intended performance, but don't necessarily match the best reported results in the literature on every task. Consequently, be careful if using any of these implementations for scientific benchmarking comparisons. Details on each implementation's specific performance level can be found on our `benchmarks`_ page. -Support Plan -============ +Long-Term Support and Support History +===================================== + +Spinning Up is currently in maintenance mode. If there are any breaking bugs, we'll repair them to ensure that Spinning Up can continue to help people study deep RL. + +Support history so far: + +- **Nov 8, 2018:** Initial release! + +- **Nov, 2018:** Release was followed by a three-week period of high-bandwidth support. + +- **April, 2019:** Approximately six months after release, we conducted an internal review of Spinning Up based on feedback from the community. The review surfaced interest in a few key features: + + * **Implementations in Other Neural Network Libraries.** Several people expressed interest in seeing Spinning Up use alternatives to Tensorflow v1 for the RL implementations. A few members of the community even developed their own PyTorch versions of Spinning Up algorithms, such as Kashif Rasul's `Fired Up`_, Kai Arulkumaran's `Spinning Up Basic`_, and Misha Laskin's `Torching Up`_. As a result, making this kind of "Rosetta Stone" for deep RL became a high priority for future work. -We plan to support Spinning Up to ensure that it serves as a helpful resource for learning about deep reinforcement learning. The exact nature of long-term (multi-year) support for Spinning Up is yet to be determined, but in the short run, we commit to: + * **Open Source RL Environments.** Many people expressed an interest in seeing Spinning Up use more open source RL environments (eg `PyBullet`_) for benchmarks, examples, and exercises. -- High-bandwidth support for the first three weeks after release (Nov 8, 2018 to Nov 29, 2018). + * **More Algorithms.** There was some interest in seeing other algorithms included in Spinning Up, especially Deep Q-Networks. - + We'll move quickly on bug-fixes, question-answering, and modifications to the docs to clear up ambiguities. - + We'll work hard to streamline the user experience, in order to make it as easy as possible to self-study with Spinning Up. +- **Jan, 2020:** The PyTorch update to Spinning Up was released! -- Approximately six months after release (in April 2019), we'll do a serious review of the state of the package based on feedback we receive from the community, and announce any plans for future modification, including a long-term roadmap. +- **Future:** No major updates are currently planned for Spinning Up. In the event it makes sense for us to release an additional update, following what we found in the 6-month review, the next-highest priority features are to focus more on open source RL environments and adding algorithms. -Additionally, as discussed in the blog post, we are using Spinning Up in the curriculum for our upcoming cohorts of Scholars_ and Fellows_. Any changes and updates we make for their benefit will immediately become public as well. +Additionally, as discussed in the blog post, Spinning Up has been integrated into the curriculum for our Scholars_ and Fellows_ programs. .. _`introduction`: ../spinningup/rl_intro.html @@ -104,5 +115,10 @@ Additionally, as discussed in the blog post, we are using Spinning Up in the cur .. _`observation normalization`: https://github.com/openai/baselines/blob/28aca637d0f13f4415cc5ebb778144154cff3110/baselines/run.py#L131 .. _`original TD3 code`: https://github.com/sfujim/TD3/blob/25dfc0a6562c54ae5575fad5b8f08bc9d5c4e26c/main.py#L89 .. _`benchmarks`: ../spinningup/bench.html -.. _Scholars : https://jobs.lever.co/openai/cf6de4ed-4afd-4ace-9273-8842c003c842 -.. _Fellows : https://jobs.lever.co/openai/c9ba3f64-2419-4ff9-b81d-0526ae059f57 +.. _`Fired Up`: https://github.com/kashif/firedup +.. _`Spinning Up Basic`: https://github.com/Kaixhin/spinning-up-basic +.. _`Torching Up`: https://github.com/MishaLaskin/torchingup +.. _`PyBullet`: https://pybullet.org/wordpress/ +.. _`MuJoCo`: http://mujoco.org/ +.. _Scholars : https://openai.com/blog/openai-scholars-spring-2020/ +.. _Fellows : https://openai.com/blog/openai-fellows-fall-2018/ \ No newline at end of file diff --git a/docs/user/running.rst b/docs/user/running.rst index 23df1cd7d..04ecc9f47 100644 --- a/docs/user/running.rst +++ b/docs/user/running.rst @@ -38,11 +38,13 @@ eg: .. parsed-literal:: python -m spinup.run ppo --exp_name ppo_ant --env Ant-v2 --clip_ratio 0.1 0.2 - --hid[h] [32,32] [64,32] --act tf.nn.tanh --seed 0 10 20 --dt + --hid[h] [32,32] [64,32] --act torch.nn.Tanh --seed 0 10 20 --dt --data_dir path/to/data runs PPO in the ``Ant-v2`` Gym environment, with various settings controlled by the flags. + By default, the PyTorch version will run (except for with TRPO, since Spinning Up doesn't have a PyTorch TRPO yet). Substitute ``ppo`` with ``ppo_tf1`` for the Tensorflow version. + ``clip_ratio``, ``hid``, and ``act`` are flags to set some algorithm hyperparameters. You can provide multiple values for hyperparameters to run multiple experiments. Check the docs to see what hyperparameters you can set (click here for the `PPO documentation`_). ``hid`` and ``act`` are `special shortcut flags`_ for setting the hidden sizes and activation function for the neural networks trained by the algorithm. @@ -63,6 +65,23 @@ eg: .. _`special shortcut flags`: ../user/running.html#shortcut-flags .. _`Save directory names`: ../user/running.html#where-results-are-saved +Choosing PyTorch or Tensorflow from the Command Line +---------------------------------------------------- + +To use a PyTorch version of an algorithm, run with + +.. parsed-literal:: + + python -m spinup.run [algo]_pytorch + +To use a Tensorflow version of an algorithm, run with + +.. parsed-literal:: + + python -m spinup.run [algo]_tf1 + +If you run ``python -m spinup.run [algo]`` without ``_pytorch`` or ``_tf1``, the runner will look in ``spinup/user_config.py`` for which version it should default to for that algorithm. + Setting Hyperparameters from the Command Line --------------------------------------------- @@ -80,9 +99,9 @@ to see a readout of the docstring. .. parsed-literal:: - python -m spinup.run ppo --env Walker2d-v2 --exp_name walker --act tf.nn.elu + python -m spinup.run ppo --env Walker2d-v2 --exp_name walker --act torch.nn.ELU - sets ``tf.nn.elu`` as the activation function. + sets ``torch.nn.ELU`` as the activation function. (Tensorflow equivalent: run ``ppo_tf1`` with ``--act tf.nn.elu``.) .. admonition:: You Should Know @@ -193,7 +212,7 @@ For example, consider: .. parsed-literal:: - python -m spinup.run ddpg --env Hopper-v2 --hid[h] [300] [128,128] --act tf.nn.tanh tf.nn.relu + python -m spinup.run ddpg_tf1 --env Hopper-v2 --hid[h] [300] [128,128] --act tf.nn.tanh tf.nn.relu Here, the ``--hid`` flag is given a **user-supplied shorthand**, ``h``. The ``--act`` flag is not given a shorthand by the user, so one will be constructed for it automatically. @@ -213,7 +232,7 @@ Extra .. admonition:: You Don't Actually Need to Know This One - Each individual algorithm is located in a file ``spinup/algos/ALGO_NAME/ALGO_NAME.py``, and these files can be run directly from the command line with a limited set of arguments (some of which differ from what's available to ``spinup/run.py``). The command line support in the individual algorithm files is essentially vestigial, however, and this is **not** a recommended way to perform experiments. + Each individual algorithm is located in a file ``spinup/algos/BACKEND/ALGO_NAME/ALGO_NAME.py``, and these files can be run directly from the command line with a limited set of arguments (some of which differ from what's available to ``spinup/run.py``). The command line support in the individual algorithm files is essentially vestigial, however, and this is **not** a recommended way to perform experiments. This documentation page will not describe those command line calls, and will *only* describe calls through ``spinup/run.py``. @@ -222,13 +241,13 @@ Launching from Scripts Each algorithm is implemented as a python function, which can be imported directly from the ``spinup`` package, eg ->>> from spinup import ppo +>>> from spinup import ppo_pytorch as ppo See the documentation page for each algorithm for a complete account of possible arguments. These methods can be used to set up specialized custom experiments, for example: .. code-block:: python - from spinup import ppo + from spinup import ppo_tf1 as ppo import tensorflow as tf import gym @@ -247,14 +266,14 @@ Using ExperimentGrid It's often useful in machine learning research to run the same algorithm with many possible hyperparameters. Spinning Up ships with a simple tool for facilitating this, called `ExperimentGrid`_. -Consider the example in ``spinup/examples/bench_ppo_cartpole.py``: +Consider the example in ``spinup/examples/pytorch/bench_ppo_cartpole.py``: .. code-block:: python :linenos: from spinup.utils.run_utils import ExperimentGrid - from spinup import ppo - import tensorflow as tf + from spinup import ppo_pytorch + import torch if __name__ == '__main__': import argparse @@ -263,14 +282,16 @@ Consider the example in ``spinup/examples/bench_ppo_cartpole.py``: parser.add_argument('--num_runs', type=int, default=3) args = parser.parse_args() - eg = ExperimentGrid(name='ppo-bench') + eg = ExperimentGrid(name='ppo-pyt-bench') eg.add('env_name', 'CartPole-v0', '', True) eg.add('seed', [10*i for i in range(args.num_runs)]) eg.add('epochs', 10) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', [(32,), (64,64)], 'hid') - eg.add('ac_kwargs:activation', [tf.tanh, tf.nn.relu], '') - eg.run(ppo, num_cpu=args.cpu) + eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '') + eg.run(ppo_pytorch, num_cpu=args.cpu) + +(An equivalent Tensorflow example is available in ``spinup/examples/tf1/bench_ppo_cartpole.py``.) After making the ExperimentGrid object, parameters are added to it with diff --git a/docs/user/saving_and_loading.rst b/docs/user/saving_and_loading.rst index 3397a2a21..89414afa6 100644 --- a/docs/user/saving_and_loading.rst +++ b/docs/user/saving_and_loading.rst @@ -23,8 +23,13 @@ Each algorithm is set up to save a training run's hyperparameter configuration, +--------------------------------------------------------------------------------+ | **Output Directory Structure** | +----------------+---------------------------------------------------------------+ -|``simple_save/``| | A directory containing everything needed to restore the | -| | | trained agent and value functions. (`Details below.`_) | +|``pyt_save/`` | | **PyTorch implementations only.** A directory containing | +| | | everything needed to restore the trained agent and value | +| | | functions. (`Details for PyTorch saves below.`_) | ++----------------+---------------------------------------------------------------+ +|``tf1_save/`` | | **Tensorflow implementations only.** A directory containing | +| | | everything needed to restore the trained agent and value | +| | | functions. (`Details for Tensorflow saves below.`_) | +----------------+---------------------------------------------------------------+ |``config.json`` | | A dict containing an as-complete-as-possible description | | | | of the args and kwargs you used to launch the training | @@ -47,12 +52,40 @@ Each algorithm is set up to save a training run's hyperparameter configuration, Sometimes environment-saving fails because the environment can't be pickled, and ``vars.pkl`` is empty. This is known to be a problem for Gym Box2D environments in older versions of Gym, which can't be saved in this manner. -.. _`Details below.`: +.. admonition:: You Should Know + + As of 1/30/20, the save directory structure has changed slightly. Previously, Tensorflow graphs were saved in the ``simple_save/`` folder; this has been replaced with ``tf1_save/``. + +.. admonition:: You Should Know + + The only file in here that you should ever have to use "by hand" is the ``config.json`` file. Our agent testing utility will load things from the ``tf1_save/`` or ``pyt_save/`` directory, and our plotter interprets the contents of ``progress.txt``, and those are the correct tools for interfacing with these outputs. But there is no tooling for ``config.json``---it's just there so that if you forget what hyperparameters you ran an experiment with, you can double-check. + + + +PyTorch Save Directory Info +--------------------------- +.. _`Details for PyTorch saves below.`: + +The ``pyt_save`` directory contains: + ++----------------------------------------------------------------------------------+ +| **Pyt_Save Directory Structure** | ++------------------+---------------------------------------------------------------+ +|``model.pt`` | | A file created with ``torch.save``, essentially just a | +| | | pickled PyTorch ``nn.Module``. Loading it will restore | +| | | a trained agent as an ActorCritic object with an ``act`` | +| | | method. | ++------------------+---------------------------------------------------------------+ + + +Tensorflow Save Directory Info +------------------------------ +.. _`Details for Tensorflow saves below.`: -The ``simple_save`` directory contains: +The ``tf1_save`` directory contains: +----------------------------------------------------------------------------------+ -| **Simple_Save Directory Structure** | +| **TF1_Save Directory Structure** | +------------------+---------------------------------------------------------------+ |``variables/`` | | A directory containing outputs from the Tensorflow Saver. | | | | See documentation for `Tensorflow SavedModel`_. | @@ -63,10 +96,6 @@ The ``simple_save`` directory contains: |``saved_model.pb``| | A protocol buffer, needed for a `Tensorflow SavedModel`_. | +------------------+---------------------------------------------------------------+ -.. admonition:: You Should Know - - The only file in here that you should ever have to use "by hand" is the ``config.json`` file. Our agent testing utility will load things from the ``simple_save/`` directory and ``vars.pkl`` file, and our plotter interprets the contents of ``progress.txt``, and those are the correct tools for interfacing with these outputs. But there is no tooling for ``config.json``---it's just there so that if you forget what hyperparameters you ran an experiment with, you can double-check. - .. _`Tensorflow SavedModel`: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md @@ -127,21 +156,19 @@ There are a few flags for options: The default value of this flag means "use the latest snapshot." - To modify an algo so it does produce multiple snapshots, find the following lines (which are present in all of the algorithms): + To modify an algo so it does produce multiple snapshots, find the following line (which is present in all of the algorithms): .. code-block:: python - if (epoch % save_freq == 0) or (epoch == epochs-1): - logger.save_state({'env': env}, None) + logger.save_state({'env': env}, None) - and tweak them to + and tweak it to .. code-block:: python - if (epoch % save_freq == 0) or (epoch == epochs-1): - logger.save_state({'env': env}, epoch) + logger.save_state({'env': env}, epoch) - Make sure to then also set ``save_freq`` to something reasonable (because if it defaults to 1, for instance, you'll flood your output directory with one ``simple_save`` folder for each snapshot---which adds up fast). + Make sure to then also set ``save_freq`` to something reasonable (because if it defaults to 1, for instance, you'll flood your output directory with one ``save`` folder for each snapshot---which adds up fast). .. option:: -d, --deterministic @@ -153,15 +180,15 @@ There are a few flags for options: Environment Not Found Error --------------------------- -If the environment wasn't saved successfully, you can expect ``test_policy.py`` to crash with +If the environment wasn't saved successfully, you can expect ``test_policy.py`` to crash with something that looks like .. parsed-literal:: Traceback (most recent call last): - File "spinup/utils/test_policy.py", line 88, in + File "spinup/utils/test_policy.py", line 153, in run_policy(env, get_action, args.len, args.episodes, not(args.norender)) - File "spinup/utils/test_policy.py", line 50, in run_policy - "page on Experiment Outputs for how to handle this situation." + File "spinup/utils/test_policy.py", line 114, in run_policy + "and we can't run the agent in it. :( \n\n Check out the readthedocs " + AssertionError: Environment not found! It looks like the environment wasn't saved, and we can't run the agent in it. :( @@ -171,9 +198,9 @@ If the environment wasn't saved successfully, you can expect ``test_policy.py`` In this case, watching your agent perform is slightly more of a pain but not impossible, as long as you can recreate your environment easily. Try the following in IPython: ->>> from spinup.utils.test_policy import load_policy, run_policy +>>> from spinup.utils.test_policy import load_policy_and_env, run_policy >>> import your_env ->>> _, get_action = load_policy('/path/to/output_directory') +>>> _, get_action = load_policy_and_env('/path/to/output_directory') >>> env = your_env.make() >>> run_policy(env, get_action) Logging data to /tmp/experiments/1536150702/progress.txt @@ -185,6 +212,6 @@ Episode 1 EpRet -346.164 EpLen 99 Using Trained Value Functions ----------------------------- -The ``test_policy.py`` tool doesn't help you look at trained value functions, and if you want to use those, you will have to do some digging by hand. Check the documentation for the `restore_tf_graph`_ function for details on how. +The ``test_policy.py`` tool doesn't help you look at trained value functions, and if you want to use those, you will have to do some digging by hand. For the PyTorch case, load the saved model file with ``torch.load`` and check the documentation for each algorithm to see what modules the ActorCritic object has. For the Tensorflow case, load the saved computation graph with the `restore_tf_graph`_ function, and check the documentation for each algorithm to see what functions were saved. .. _`restore_tf_graph`: ../utils/logger.html#spinup.utils.logx.restore_tf_graph \ No newline at end of file diff --git a/docs/utils/logger.rst b/docs/utils/logger.rst index 2c338bc4f..a4f545ed9 100644 --- a/docs/utils/logger.rst +++ b/docs/utils/logger.rst @@ -128,6 +128,15 @@ In this example, observe that .. _`logger.dump_tabular`: ../utils/logger.html#spinup.utils.logx.Logger.dump_tabular +Logging and PyTorch +------------------- + +The preceding example was given in Tensorflow. For PyTorch, everything is the same except for L42-43: instead of ``logger.setup_tf_saver``, you would use ``logger.setup_pytorch_saver``, and you would pass it `a PyTorch module`_ (the network you are training) as an argument. + +The behavior of ``logger.save_state`` is the same as in the Tensorflow case: each time it is called, it'll save the latest version of the PyTorch module. + +.. _`a PyTorch module`: https://pytorch.org/docs/stable/nn.html#torch.nn.Module + Logging and MPI --------------- @@ -150,13 +159,28 @@ Logger Classes :members: +Loading Saved Models (PyTorch Only) +=================================== + +To load an actor-critic model saved by a PyTorch Spinning Up implementation, run: + +.. code-block:: python + + ac = torch.load('path/to/model.pt') + +When you use this method to load an actor-critic model, you can minimally expect it to have an ``act`` method that allows you to sample actions from the policy, given observations: + +.. code-block:: python + + actions = ac.act(torch.as_tensor(obs, dtype=torch.float32)) + -Loading Saved Graphs -==================== +Loading Saved Graphs (Tensorflow Only) +====================================== .. autofunction:: spinup.utils.logx.restore_tf_graph -When you use this method to restore a graph saved by a Spinning Up implementation, you can minimally expect it to include the following: +When you use this method to restore a graph saved by a Tensorflow Spinning Up implementation, you can minimally expect it to include the following: ====== =============================================== Key Value diff --git a/docs/utils/mpi.rst b/docs/utils/mpi.rst index 4a1a5b424..054c4de69 100644 --- a/docs/utils/mpi.rst +++ b/docs/utils/mpi.rst @@ -11,6 +11,31 @@ Core MPI Utilities :members: +MPI + PyTorch Utilities +======================= + +``spinup.utils.mpi_pytorch`` contains a few tools to make it easy to do data-parallel PyTorch optimization across MPI processes. The two main ingredients are syncing parameters and averaging gradients before they are used by the adaptive optimizer. Also there's a hacky fix for a problem where the PyTorch instance in each separate process tries to get too many threads, and they start to clobber each other. + +The pattern for using these tools looks something like this: + +1) At the beginning of the training script, call ``setup_pytorch_for_mpi()``. (Avoids clobbering problem.) + +2) After you've constructed a PyTorch module, call ``sync_params(module)``. + +3) Then, during gradient descent, call ``mpi_avg_grads`` after the backward pass, like so: + +.. code-block:: python + + optimizer.zero_grad() + loss = compute_loss(module) + loss.backward() + mpi_avg_grads(module) # averages gradient buffers across MPI processes! + optimizer.step() + + +.. automodule:: spinup.utils.mpi_pytorch + :members: + MPI + Tensorflow Utilities ========================== @@ -19,4 +44,4 @@ The ``spinup.utils.mpi_tf`` contains a a few tools to make it easy to use the Ad .. _`horovod`: https://github.com/uber/horovod .. automodule:: spinup.utils.mpi_tf - :members: \ No newline at end of file + :members: diff --git a/readme.md b/readme.md index 252eff11c..2ed816d64 100644 --- a/readme.md +++ b/readme.md @@ -15,4 +15,18 @@ This module contains a variety of helpful resources, including: - a well-documented [code repo](https://github.com/openai/spinningup) of short, standalone implementations of key algorithms, - and a few [exercises](https://spinningup.openai.com/en/latest/spinningup/exercises.html) to serve as warm-ups. -Get started at [spinningup.openai.com](https://spinningup.openai.com)! \ No newline at end of file +Get started at [spinningup.openai.com](https://spinningup.openai.com)! + + +Citing Spinning Up +------------------ + +If you reference or use Spinning Up in your research, please cite: + +``` +@article{SpinningUp2018, + author = {Achiam, Joshua}, + title = {{Spinning Up in Deep Reinforcement Learning}}, + year = {2018} +} +``` \ No newline at end of file diff --git a/setup.py b/setup.py index 6a0881ce9..16116a784 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ version=__version__,#'0.1', install_requires=[ 'cloudpickle==1.2.1', - 'gym[atari,box2d,classic_control]>=0.10.8', + 'gym[atari,box2d,classic_control]~=0.15.3', 'ipython', 'joblib', 'matplotlib==3.1.1', @@ -27,9 +27,9 @@ 'scipy', 'seaborn==0.8.1', 'tensorflow>=1.8.0,<2.0', + 'torch==1.3.1', 'tqdm' ], - extras_require={'mujoco': 'mujoco-py==2.0.2.7'}, description="Teaching tools for introducing people to deep RL.", author="Joshua Achiam", ) diff --git a/spinup/__init__.py b/spinup/__init__.py index fe8258630..e5e7ead95 100644 --- a/spinup/__init__.py +++ b/spinup/__init__.py @@ -1,10 +1,22 @@ +# Disable TF deprecation warnings. +# Syntax from tf1 is not expected to be compatible with tf2. +import tensorflow as tf +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + # Algorithms -from spinup.algos.ddpg.ddpg import ddpg -from spinup.algos.ppo.ppo import ppo -from spinup.algos.sac.sac import sac -from spinup.algos.td3.td3 import td3 -from spinup.algos.trpo.trpo import trpo -from spinup.algos.vpg.vpg import vpg +from spinup.algos.tf1.ddpg.ddpg import ddpg as ddpg_tf1 +from spinup.algos.tf1.ppo.ppo import ppo as ppo_tf1 +from spinup.algos.tf1.sac.sac import sac as sac_tf1 +from spinup.algos.tf1.td3.td3 import td3 as td3_tf1 +from spinup.algos.tf1.trpo.trpo import trpo as trpo_tf1 +from spinup.algos.tf1.vpg.vpg import vpg as vpg_tf1 + +from spinup.algos.pytorch.ddpg.ddpg import ddpg as ddpg_pytorch +from spinup.algos.pytorch.ppo.ppo import ppo as ppo_pytorch +from spinup.algos.pytorch.sac.sac import sac as sac_pytorch +from spinup.algos.pytorch.td3.td3 import td3 as td3_pytorch +from spinup.algos.pytorch.trpo.trpo import trpo as trpo_pytorch +from spinup.algos.pytorch.vpg.vpg import vpg as vpg_pytorch # Loggers from spinup.utils.logx import Logger, EpochLogger diff --git a/spinup/algos/pytorch/ddpg/core.py b/spinup/algos/pytorch/ddpg/core.py new file mode 100644 index 000000000..e11cd4b03 --- /dev/null +++ b/spinup/algos/pytorch/ddpg/core.py @@ -0,0 +1,61 @@ +import numpy as np +import scipy.signal + +import torch +import torch.nn as nn + + +def combined_shape(length, shape=None): + if shape is None: + return (length,) + return (length, shape) if np.isscalar(shape) else (length, *shape) + +def mlp(sizes, activation, output_activation=nn.Identity): + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + +def count_vars(module): + return sum([np.prod(p.shape) for p in module.parameters()]) + +class MLPActor(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit): + super().__init__() + pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim] + self.pi = mlp(pi_sizes, activation, nn.Tanh) + self.act_limit = act_limit + + def forward(self, obs): + # Return output from network scaled to action space limits. + return self.act_limit * self.pi(obs) + +class MLPQFunction(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs, act): + q = self.q(torch.cat([obs, act], dim=-1)) + return torch.squeeze(q, -1) # Critical to ensure q has right shape. + +class MLPActorCritic(nn.Module): + + def __init__(self, observation_space, action_space, hidden_sizes=(256,256), + activation=nn.ReLU): + super().__init__() + + obs_dim = observation_space.shape[0] + act_dim = action_space.shape[0] + act_limit = action_space.high[0] + + # build policy and value functions + self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation, act_limit) + self.q = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation) + + def act(self, obs): + with torch.no_grad(): + return self.pi(obs).numpy() diff --git a/spinup/algos/pytorch/ddpg/ddpg.py b/spinup/algos/pytorch/ddpg/ddpg.py new file mode 100644 index 000000000..1416a2123 --- /dev/null +++ b/spinup/algos/pytorch/ddpg/ddpg.py @@ -0,0 +1,327 @@ +from copy import deepcopy +import numpy as np +import torch +from torch.optim import Adam +import gym +import time +import spinup.algos.pytorch.ddpg.core as core +from spinup.utils.logx import EpochLogger + + +class ReplayBuffer: + """ + A simple FIFO experience replay buffer for DDPG agents. + """ + + def __init__(self, obs_dim, act_dim, size): + self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) + self.rew_buf = np.zeros(size, dtype=np.float32) + self.done_buf = np.zeros(size, dtype=np.float32) + self.ptr, self.size, self.max_size = 0, 0, size + + def store(self, obs, act, rew, next_obs, done): + self.obs_buf[self.ptr] = obs + self.obs2_buf[self.ptr] = next_obs + self.act_buf[self.ptr] = act + self.rew_buf[self.ptr] = rew + self.done_buf[self.ptr] = done + self.ptr = (self.ptr+1) % self.max_size + self.size = min(self.size+1, self.max_size) + + def sample_batch(self, batch_size=32): + idxs = np.random.randint(0, self.size, size=batch_size) + batch = dict(obs=self.obs_buf[idxs], + obs2=self.obs2_buf[idxs], + act=self.act_buf[idxs], + rew=self.rew_buf[idxs], + done=self.done_buf[idxs]) + return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()} + + + +def ddpg(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, + polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, + update_after=1000, update_every=50, act_noise=0.1, num_test_episodes=10, + max_ep_len=1000, logger_kwargs=dict(), save_freq=1): + """ + Deep Deterministic Policy Gradient (DDPG) + + + Args: + env_fn : A function which creates a copy of the environment. + The environment must satisfy the OpenAI Gym API. + + actor_critic: The constructor method for a PyTorch Module with an ``act`` + method, a ``pi`` module, and a ``q`` module. The ``act`` method and + ``pi`` module should accept batches of observations as inputs, + and ``q`` should accept a batch of observations and a batch of + actions as inputs. When called, these should return: + + =========== ================ ====================================== + Call Output Shape Description + =========== ================ ====================================== + ``act`` (batch, act_dim) | Numpy array of actions for each + | observation. + ``pi`` (batch, act_dim) | Tensor containing actions from policy + | given observations. + ``q`` (batch,) | Tensor containing the current estimate + | of Q* for the provided observations + | and actions. (Critical: make sure to + | flatten this!) + =========== ================ ====================================== + + ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object + you provided to DDPG. + + seed (int): Seed for random number generators. + + steps_per_epoch (int): Number of steps of interaction (state-action pairs) + for the agent and the environment in each epoch. + + epochs (int): Number of epochs to run and train agent. + + replay_size (int): Maximum length of replay buffer. + + gamma (float): Discount factor. (Always between 0 and 1.) + + polyak (float): Interpolation factor in polyak averaging for target + networks. Target networks are updated towards main networks + according to: + + .. math:: \\theta_{\\text{targ}} \\leftarrow + \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta + + where :math:`\\rho` is polyak. (Always between 0 and 1, usually + close to 1.) + + pi_lr (float): Learning rate for policy. + + q_lr (float): Learning rate for Q-networks. + + batch_size (int): Minibatch size for SGD. + + start_steps (int): Number of steps for uniform-random action selection, + before running real policy. Helps exploration. + + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + + act_noise (float): Stddev for Gaussian exploration noise added to + policy at training time. (At test time, no noise is added.) + + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + + max_ep_len (int): Maximum length of trajectory / episode / rollout. + + logger_kwargs (dict): Keyword args for EpochLogger. + + save_freq (int): How often (in terms of gap between epochs) to save + the current policy and value function. + + """ + + logger = EpochLogger(**logger_kwargs) + logger.save_config(locals()) + + torch.manual_seed(seed) + np.random.seed(seed) + + env, test_env = env_fn(), env_fn() + obs_dim = env.observation_space.shape + act_dim = env.action_space.shape[0] + + # Action limit for clamping: critically, assumes all dimensions share the same bound! + act_limit = env.action_space.high[0] + + # Create actor-critic module and target networks + ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs) + ac_targ = deepcopy(ac) + + # Freeze target networks with respect to optimizers (only update via polyak averaging) + for p in ac_targ.parameters(): + p.requires_grad = False + + # Experience buffer + replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size) + + # Count variables (protip: try to get a feel for how different size networks behave!) + var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.q]) + logger.log('\nNumber of parameters: \t pi: %d, \t q: %d\n'%var_counts) + + # Set up function for computing DDPG Q-loss + def compute_loss_q(data): + o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done'] + + q = ac.q(o,a) + + # Bellman backup for Q function + with torch.no_grad(): + q_pi_targ = ac_targ.q(o2, ac_targ.pi(o2)) + backup = r + gamma * (1 - d) * q_pi_targ + + # MSE loss against Bellman backup + loss_q = ((q - backup)**2).mean() + + # Useful info for logging + loss_info = dict(QVals=q.detach().numpy()) + + return loss_q, loss_info + + # Set up function for computing DDPG pi loss + def compute_loss_pi(data): + o = data['obs'] + q_pi = ac.q(o, ac.pi(o)) + return -q_pi.mean() + + # Set up optimizers for policy and q-function + pi_optimizer = Adam(ac.pi.parameters(), lr=pi_lr) + q_optimizer = Adam(ac.q.parameters(), lr=q_lr) + + # Set up model saving + logger.setup_pytorch_saver(ac) + + def update(data): + # First run one gradient descent step for Q. + q_optimizer.zero_grad() + loss_q, loss_info = compute_loss_q(data) + loss_q.backward() + q_optimizer.step() + + # Freeze Q-network so you don't waste computational effort + # computing gradients for it during the policy learning step. + for p in ac.q.parameters(): + p.requires_grad = False + + # Next run one gradient descent step for pi. + pi_optimizer.zero_grad() + loss_pi = compute_loss_pi(data) + loss_pi.backward() + pi_optimizer.step() + + # Unfreeze Q-network so you can optimize it at next DDPG step. + for p in ac.q.parameters(): + p.requires_grad = True + + # Record things + logger.store(LossQ=loss_q.item(), LossPi=loss_pi.item(), **loss_info) + + # Finally, update target networks by polyak averaging. + with torch.no_grad(): + for p, p_targ in zip(ac.parameters(), ac_targ.parameters()): + # NB: We use an in-place operations "mul_", "add_" to update target + # params, as opposed to "mul" and "add", which would make new tensors. + p_targ.data.mul_(polyak) + p_targ.data.add_((1 - polyak) * p.data) + + def get_action(o, noise_scale): + a = ac.act(torch.as_tensor(o, dtype=torch.float32)) + a += noise_scale * np.random.randn(act_dim) + return np.clip(a, -act_limit, act_limit) + + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 + while not(d or (ep_len == max_ep_len)): + # Take deterministic actions at test time (noise_scale=0) + o, r, d, _ = test_env.step(get_action(o, 0)) + ep_ret += r + ep_len += 1 + logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) + + # Prepare for interaction with environment + total_steps = steps_per_epoch * epochs + start_time = time.time() + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Main loop: collect experience in env and update/log each epoch + for t in range(total_steps): + + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy (with some noise, via act_noise). + if t > start_steps: + a = get_action(o, act_noise) + else: + a = env.action_space.sample() + + # Step the env + o2, r, d, _ = env.step(a) + ep_ret += r + ep_len += 1 + + # Ignore the "done" signal if it comes from hitting the time + # horizon (that is, when it's an artificial terminal signal + # that isn't based on the agent's state) + d = False if ep_len==max_ep_len else d + + # Store experience to replay buffer + replay_buffer.store(o, a, r, o2, d) + + # Super critical, easy to overlook step: make sure to update + # most recent observation! + o = o2 + + # End of trajectory handling + if d or (ep_len == max_ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for _ in range(update_every): + batch = replay_buffer.sample_batch(batch_size) + update(data=batch) + + # End of epoch handling + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch + + # Save model + if (epoch % save_freq == 0) or (epoch == epochs): + logger.save_state({'env': env}, None) + + # Test the performance of the deterministic version of the agent. + test_agent() + + # Log info about epoch + logger.log_tabular('Epoch', epoch) + logger.log_tabular('EpRet', with_min_and_max=True) + logger.log_tabular('TestEpRet', with_min_and_max=True) + logger.log_tabular('EpLen', average_only=True) + logger.log_tabular('TestEpLen', average_only=True) + logger.log_tabular('TotalEnvInteracts', t) + logger.log_tabular('QVals', with_min_and_max=True) + logger.log_tabular('LossPi', average_only=True) + logger.log_tabular('LossQ', average_only=True) + logger.log_tabular('Time', time.time()-start_time) + logger.dump_tabular() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='HalfCheetah-v2') + parser.add_argument('--hid', type=int, default=256) + parser.add_argument('--l', type=int, default=2) + parser.add_argument('--gamma', type=float, default=0.99) + parser.add_argument('--seed', '-s', type=int, default=0) + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--exp_name', type=str, default='ddpg') + args = parser.parse_args() + + from spinup.utils.run_utils import setup_logger_kwargs + logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) + + ddpg(lambda : gym.make(args.env), actor_critic=core.MLPActorCritic, + ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), + gamma=args.gamma, seed=args.seed, epochs=args.epochs, + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/pytorch/ppo/core.py b/spinup/algos/pytorch/ppo/core.py new file mode 100644 index 000000000..84e9f9889 --- /dev/null +++ b/spinup/algos/pytorch/ppo/core.py @@ -0,0 +1,135 @@ +import numpy as np +import scipy.signal +from gym.spaces import Box, Discrete + +import torch +import torch.nn as nn +from torch.distributions.normal import Normal +from torch.distributions.categorical import Categorical + + +def combined_shape(length, shape=None): + if shape is None: + return (length,) + return (length, shape) if np.isscalar(shape) else (length, *shape) + + +def mlp(sizes, activation, output_activation=nn.Identity): + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + + +def count_vars(module): + return sum([np.prod(p.shape) for p in module.parameters()]) + + +def discount_cumsum(x, discount): + """ + magic from rllab for computing discounted cumulative sums of vectors. + + input: + vector x, + [x0, + x1, + x2] + + output: + [x0 + discount * x1 + discount^2 * x2, + x1 + discount * x2, + x2] + """ + return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] + + +class Actor(nn.Module): + + def _distribution(self, obs): + raise NotImplementedError + + def _log_prob_from_distribution(self, pi, act): + raise NotImplementedError + + def forward(self, obs, act=None): + # Produce action distributions for given observations, and + # optionally compute the log likelihood of given actions under + # those distributions. + pi = self._distribution(obs) + logp_a = None + if act is not None: + logp_a = self._log_prob_from_distribution(pi, act) + return pi, logp_a + + +class MLPCategoricalActor(Actor): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.logits_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation) + + def _distribution(self, obs): + logits = self.logits_net(obs) + return Categorical(logits=logits) + + def _log_prob_from_distribution(self, pi, act): + return pi.log_prob(act) + + +class MLPGaussianActor(Actor): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + log_std = -0.5 * np.ones(act_dim, dtype=np.float32) + self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) + self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation) + + def _distribution(self, obs): + mu = self.mu_net(obs) + std = torch.exp(self.log_std) + return Normal(mu, std) + + def _log_prob_from_distribution(self, pi, act): + return pi.log_prob(act).sum(axis=-1) # Last axis sum needed for Torch Normal distribution + + +class MLPCritic(nn.Module): + + def __init__(self, obs_dim, hidden_sizes, activation): + super().__init__() + self.v_net = mlp([obs_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs): + return torch.squeeze(self.v_net(obs), -1) # Critical to ensure v has right shape. + + + +class MLPActorCritic(nn.Module): + + + def __init__(self, observation_space, action_space, + hidden_sizes=(64,64), activation=nn.Tanh): + super().__init__() + + obs_dim = observation_space.shape[0] + + # policy builder depends on action space + if isinstance(action_space, Box): + self.pi = MLPGaussianActor(obs_dim, action_space.shape[0], hidden_sizes, activation) + elif isinstance(action_space, Discrete): + self.pi = MLPCategoricalActor(obs_dim, action_space.n, hidden_sizes, activation) + + # build value function + self.v = MLPCritic(obs_dim, hidden_sizes, activation) + + def step(self, obs): + with torch.no_grad(): + pi = self.pi._distribution(obs) + a = pi.sample() + logp_a = self.pi._log_prob_from_distribution(pi, a) + v = self.v(obs) + return a.numpy(), v.numpy(), logp_a.numpy() + + def act(self, obs): + return self.step(obs)[0] \ No newline at end of file diff --git a/spinup/algos/pytorch/ppo/ppo.py b/spinup/algos/pytorch/ppo/ppo.py new file mode 100644 index 000000000..a77ed9daf --- /dev/null +++ b/spinup/algos/pytorch/ppo/ppo.py @@ -0,0 +1,378 @@ +import numpy as np +import torch +from torch.optim import Adam +import gym +import time +import spinup.algos.pytorch.ppo.core as core +from spinup.utils.logx import EpochLogger +from spinup.utils.mpi_pytorch import setup_pytorch_for_mpi, sync_params, mpi_avg_grads +from spinup.utils.mpi_tools import mpi_fork, mpi_avg, proc_id, mpi_statistics_scalar, num_procs + + +class PPOBuffer: + """ + A buffer for storing trajectories experienced by a PPO agent interacting + with the environment, and using Generalized Advantage Estimation (GAE-Lambda) + for calculating the advantages of state-action pairs. + """ + + def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): + self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) + self.adv_buf = np.zeros(size, dtype=np.float32) + self.rew_buf = np.zeros(size, dtype=np.float32) + self.ret_buf = np.zeros(size, dtype=np.float32) + self.val_buf = np.zeros(size, dtype=np.float32) + self.logp_buf = np.zeros(size, dtype=np.float32) + self.gamma, self.lam = gamma, lam + self.ptr, self.path_start_idx, self.max_size = 0, 0, size + + def store(self, obs, act, rew, val, logp): + """ + Append one timestep of agent-environment interaction to the buffer. + """ + assert self.ptr < self.max_size # buffer has to have room so you can store + self.obs_buf[self.ptr] = obs + self.act_buf[self.ptr] = act + self.rew_buf[self.ptr] = rew + self.val_buf[self.ptr] = val + self.logp_buf[self.ptr] = logp + self.ptr += 1 + + def finish_path(self, last_val=0): + """ + Call this at the end of a trajectory, or when one gets cut off + by an epoch ending. This looks back in the buffer to where the + trajectory started, and uses rewards and value estimates from + the whole trajectory to compute advantage estimates with GAE-Lambda, + as well as compute the rewards-to-go for each state, to use as + the targets for the value function. + + The "last_val" argument should be 0 if the trajectory ended + because the agent reached a terminal state (died), and otherwise + should be V(s_T), the value function estimated for the last state. + This allows us to bootstrap the reward-to-go calculation to account + for timesteps beyond the arbitrary episode horizon (or epoch cutoff). + """ + + path_slice = slice(self.path_start_idx, self.ptr) + rews = np.append(self.rew_buf[path_slice], last_val) + vals = np.append(self.val_buf[path_slice], last_val) + + # the next two lines implement GAE-Lambda advantage calculation + deltas = rews[:-1] + self.gamma * vals[1:] - vals[:-1] + self.adv_buf[path_slice] = core.discount_cumsum(deltas, self.gamma * self.lam) + + # the next line computes rewards-to-go, to be targets for the value function + self.ret_buf[path_slice] = core.discount_cumsum(rews, self.gamma)[:-1] + + self.path_start_idx = self.ptr + + def get(self): + """ + Call this at the end of an epoch to get all of the data from + the buffer, with advantages appropriately normalized (shifted to have + mean zero and std one). Also, resets some pointers in the buffer. + """ + assert self.ptr == self.max_size # buffer has to be full before you can get + self.ptr, self.path_start_idx = 0, 0 + # the next two lines implement the advantage normalization trick + adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf) + self.adv_buf = (self.adv_buf - adv_mean) / adv_std + data = dict(obs=self.obs_buf, act=self.act_buf, ret=self.ret_buf, + adv=self.adv_buf, logp=self.logp_buf) + return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in data.items()} + + + +def ppo(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0, + steps_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=3e-4, + vf_lr=1e-3, train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, + target_kl=0.01, logger_kwargs=dict(), save_freq=10): + """ + Proximal Policy Optimization (by clipping), + + with early stopping based on approximate KL + + Args: + env_fn : A function which creates a copy of the environment. + The environment must satisfy the OpenAI Gym API. + + actor_critic: The constructor method for a PyTorch Module with a + ``step`` method, an ``act`` method, a ``pi`` module, and a ``v`` + module. The ``step`` method should accept a batch of observations + and return: + + =========== ================ ====================================== + Symbol Shape Description + =========== ================ ====================================== + ``a`` (batch, act_dim) | Numpy array of actions for each + | observation. + ``v`` (batch,) | Numpy array of value estimates + | for the provided observations. + ``logp_a`` (batch,) | Numpy array of log probs for the + | actions in ``a``. + =========== ================ ====================================== + + The ``act`` method behaves the same as ``step`` but only returns ``a``. + + The ``pi`` module's forward call should accept a batch of + observations and optionally a batch of actions, and return: + + =========== ================ ====================================== + Symbol Shape Description + =========== ================ ====================================== + ``pi`` N/A | Torch Distribution object, containing + | a batch of distributions describing + | the policy for the provided observations. + ``logp_a`` (batch,) | Optional (only returned if batch of + | actions is given). Tensor containing + | the log probability, according to + | the policy, of the provided actions. + | If actions not given, will contain + | ``None``. + =========== ================ ====================================== + + The ``v`` module's forward call should accept a batch of observations + and return: + + =========== ================ ====================================== + Symbol Shape Description + =========== ================ ====================================== + ``v`` (batch,) | Tensor containing the value estimates + | for the provided observations. (Critical: + | make sure to flatten this!) + =========== ================ ====================================== + + + ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object + you provided to PPO. + + seed (int): Seed for random number generators. + + steps_per_epoch (int): Number of steps of interaction (state-action pairs) + for the agent and the environment in each epoch. + + epochs (int): Number of epochs of interaction (equivalent to + number of policy updates) to perform. + + gamma (float): Discount factor. (Always between 0 and 1.) + + clip_ratio (float): Hyperparameter for clipping in the policy objective. + Roughly: how far can the new policy go from the old policy while + still profiting (improving the objective function)? The new policy + can still go farther than the clip_ratio says, but it doesn't help + on the objective anymore. (Usually small, 0.1 to 0.3.) Typically + denoted by :math:`\epsilon`. + + pi_lr (float): Learning rate for policy optimizer. + + vf_lr (float): Learning rate for value function optimizer. + + train_pi_iters (int): Maximum number of gradient descent steps to take + on policy loss per epoch. (Early stopping may cause optimizer + to take fewer than this.) + + train_v_iters (int): Number of gradient descent steps to take on + value function per epoch. + + lam (float): Lambda for GAE-Lambda. (Always between 0 and 1, + close to 1.) + + max_ep_len (int): Maximum length of trajectory / episode / rollout. + + target_kl (float): Roughly what KL divergence we think is appropriate + between new and old policies after an update. This will get used + for early stopping. (Usually small, 0.01 or 0.05.) + + logger_kwargs (dict): Keyword args for EpochLogger. + + save_freq (int): How often (in terms of gap between epochs) to save + the current policy and value function. + + """ + + # Special function to avoid certain slowdowns from PyTorch + MPI combo. + setup_pytorch_for_mpi() + + # Set up logger and save configuration + logger = EpochLogger(**logger_kwargs) + logger.save_config(locals()) + + # Random seed + seed += 10000 * proc_id() + torch.manual_seed(seed) + np.random.seed(seed) + + # Instantiate environment + env = env_fn() + obs_dim = env.observation_space.shape + act_dim = env.action_space.shape + + # Create actor-critic module + ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs) + + # Sync params across processes + sync_params(ac) + + # Count variables + var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.v]) + logger.log('\nNumber of parameters: \t pi: %d, \t v: %d\n'%var_counts) + + # Set up experience buffer + local_steps_per_epoch = int(steps_per_epoch / num_procs()) + buf = PPOBuffer(obs_dim, act_dim, local_steps_per_epoch, gamma, lam) + + # Set up function for computing PPO policy loss + def compute_loss_pi(data): + obs, act, adv, logp_old = data['obs'], data['act'], data['adv'], data['logp'] + + # Policy loss + pi, logp = ac.pi(obs, act) + ratio = torch.exp(logp - logp_old) + clip_adv = torch.clamp(ratio, 1-clip_ratio, 1+clip_ratio) * adv + loss_pi = -(torch.min(ratio * adv, clip_adv)).mean() + + # Useful extra info + approx_kl = (logp_old - logp).mean().item() + ent = pi.entropy().mean().item() + clipped = ratio.gt(1+clip_ratio) | ratio.lt(1-clip_ratio) + clipfrac = torch.as_tensor(clipped, dtype=torch.float32).mean().item() + pi_info = dict(kl=approx_kl, ent=ent, cf=clipfrac) + + return loss_pi, pi_info + + # Set up function for computing value loss + def compute_loss_v(data): + obs, ret = data['obs'], data['ret'] + return ((ac.v(obs) - ret)**2).mean() + + # Set up optimizers for policy and value function + pi_optimizer = Adam(ac.pi.parameters(), lr=pi_lr) + vf_optimizer = Adam(ac.v.parameters(), lr=vf_lr) + + # Set up model saving + logger.setup_pytorch_saver(ac) + + def update(): + data = buf.get() + + pi_l_old, pi_info_old = compute_loss_pi(data) + pi_l_old = pi_l_old.item() + v_l_old = compute_loss_v(data).item() + + # Train policy with multiple steps of gradient descent + for i in range(train_pi_iters): + pi_optimizer.zero_grad() + loss_pi, pi_info = compute_loss_pi(data) + kl = mpi_avg(pi_info['kl']) + if kl > 1.5 * target_kl: + logger.log('Early stopping at step %d due to reaching max kl.'%i) + break + loss_pi.backward() + mpi_avg_grads(ac.pi) # average grads across MPI processes + pi_optimizer.step() + + logger.store(StopIter=i) + + # Value function learning + for i in range(train_v_iters): + vf_optimizer.zero_grad() + loss_v = compute_loss_v(data) + loss_v.backward() + mpi_avg_grads(ac.v) # average grads across MPI processes + vf_optimizer.step() + + # Log changes from update + kl, ent, cf = pi_info['kl'], pi_info_old['ent'], pi_info['cf'] + logger.store(LossPi=pi_l_old, LossV=v_l_old, + KL=kl, Entropy=ent, ClipFrac=cf, + DeltaLossPi=(loss_pi.item() - pi_l_old), + DeltaLossV=(loss_v.item() - v_l_old)) + + # Prepare for interaction with environment + start_time = time.time() + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Main loop: collect experience in env and update/log each epoch + for epoch in range(epochs): + for t in range(local_steps_per_epoch): + a, v, logp = ac.step(torch.as_tensor(o, dtype=torch.float32)) + + next_o, r, d, _ = env.step(a) + ep_ret += r + ep_len += 1 + + # save and log + buf.store(o, a, r, v, logp) + logger.store(VVals=v) + + # Update obs (critical!) + o = next_o + + timeout = ep_len == max_ep_len + terminal = d or timeout + epoch_ended = t==local_steps_per_epoch-1 + + if terminal or epoch_ended: + if epoch_ended and not(terminal): + print('Warning: trajectory cut off by epoch at %d steps.'%ep_len, flush=True) + # if trajectory didn't reach terminal state, bootstrap value target + if timeout or epoch_ended: + _, v, _ = ac.step(torch.as_tensor(o, dtype=torch.float32)) + else: + v = 0 + buf.finish_path(v) + if terminal: + # only save EpRet / EpLen if trajectory finished + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + + # Save model + if (epoch % save_freq == 0) or (epoch == epochs-1): + logger.save_state({'env': env}, None) + + # Perform PPO update! + update() + + # Log info about epoch + logger.log_tabular('Epoch', epoch) + logger.log_tabular('EpRet', with_min_and_max=True) + logger.log_tabular('EpLen', average_only=True) + logger.log_tabular('VVals', with_min_and_max=True) + logger.log_tabular('TotalEnvInteracts', (epoch+1)*steps_per_epoch) + logger.log_tabular('LossPi', average_only=True) + logger.log_tabular('LossV', average_only=True) + logger.log_tabular('DeltaLossPi', average_only=True) + logger.log_tabular('DeltaLossV', average_only=True) + logger.log_tabular('Entropy', average_only=True) + logger.log_tabular('KL', average_only=True) + logger.log_tabular('ClipFrac', average_only=True) + logger.log_tabular('StopIter', average_only=True) + logger.log_tabular('Time', time.time()-start_time) + logger.dump_tabular() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='HalfCheetah-v2') + parser.add_argument('--hid', type=int, default=64) + parser.add_argument('--l', type=int, default=2) + parser.add_argument('--gamma', type=float, default=0.99) + parser.add_argument('--seed', '-s', type=int, default=0) + parser.add_argument('--cpu', type=int, default=4) + parser.add_argument('--steps', type=int, default=4000) + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--exp_name', type=str, default='ppo') + args = parser.parse_args() + + mpi_fork(args.cpu) # run parallel code with mpi + + from spinup.utils.run_utils import setup_logger_kwargs + logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) + + ppo(lambda : gym.make(args.env), actor_critic=core.MLPActorCritic, + ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, + seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs, + logger_kwargs=logger_kwargs) \ No newline at end of file diff --git a/spinup/algos/pytorch/sac/core.py b/spinup/algos/pytorch/sac/core.py new file mode 100644 index 000000000..6219346d1 --- /dev/null +++ b/spinup/algos/pytorch/sac/core.py @@ -0,0 +1,98 @@ +import numpy as np +import scipy.signal + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.distributions.normal import Normal + + +def combined_shape(length, shape=None): + if shape is None: + return (length,) + return (length, shape) if np.isscalar(shape) else (length, *shape) + +def mlp(sizes, activation, output_activation=nn.Identity): + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + +def count_vars(module): + return sum([np.prod(p.shape) for p in module.parameters()]) + + +LOG_STD_MAX = 2 +LOG_STD_MIN = -20 + +class SquashedGaussianMLPActor(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit): + super().__init__() + self.net = mlp([obs_dim] + list(hidden_sizes), activation, activation) + self.mu_layer = nn.Linear(hidden_sizes[-1], act_dim) + self.log_std_layer = nn.Linear(hidden_sizes[-1], act_dim) + self.act_limit = act_limit + + def forward(self, obs, deterministic=False, with_logprob=True): + net_out = self.net(obs) + mu = self.mu_layer(net_out) + log_std = self.log_std_layer(net_out) + log_std = torch.clamp(log_std, LOG_STD_MIN, LOG_STD_MAX) + std = torch.exp(log_std) + + # Pre-squash distribution and sample + pi_distribution = Normal(mu, std) + if deterministic: + # Only used for evaluating policy at test time. + pi_action = mu + else: + pi_action = pi_distribution.rsample() + + if with_logprob: + # Compute logprob from Gaussian, and then apply correction for Tanh squashing. + # NOTE: The correction formula is a little bit magic. To get an understanding + # of where it comes from, check out the original SAC paper (arXiv 1801.01290) + # and look in appendix C. This is a more numerically-stable equivalent to Eq 21. + # Try deriving it yourself as a (very difficult) exercise. :) + logp_pi = pi_distribution.log_prob(pi_action).sum(axis=-1) + logp_pi -= (2*(np.log(2) - pi_action - F.softplus(-2*pi_action))).sum(axis=1) + else: + logp_pi = None + + pi_action = torch.tanh(pi_action) + pi_action = self.act_limit * pi_action + + return pi_action, logp_pi + + +class MLPQFunction(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs, act): + q = self.q(torch.cat([obs, act], dim=-1)) + return torch.squeeze(q, -1) # Critical to ensure q has right shape. + +class MLPActorCritic(nn.Module): + + def __init__(self, observation_space, action_space, hidden_sizes=(256,256), + activation=nn.ReLU): + super().__init__() + + obs_dim = observation_space.shape[0] + act_dim = action_space.shape[0] + act_limit = action_space.high[0] + + # build policy and value functions + self.pi = SquashedGaussianMLPActor(obs_dim, act_dim, hidden_sizes, activation, act_limit) + self.q1 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation) + self.q2 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation) + + def act(self, obs, deterministic=False): + with torch.no_grad(): + a, _ = self.pi(obs, deterministic, False) + return a.numpy() diff --git a/spinup/algos/pytorch/sac/sac.py b/spinup/algos/pytorch/sac/sac.py new file mode 100644 index 000000000..e463d39f2 --- /dev/null +++ b/spinup/algos/pytorch/sac/sac.py @@ -0,0 +1,370 @@ +from copy import deepcopy +import itertools +import numpy as np +import torch +from torch.optim import Adam +import gym +import time +import spinup.algos.pytorch.sac.core as core +from spinup.utils.logx import EpochLogger + + +class ReplayBuffer: + """ + A simple FIFO experience replay buffer for SAC agents. + """ + + def __init__(self, obs_dim, act_dim, size): + self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) + self.rew_buf = np.zeros(size, dtype=np.float32) + self.done_buf = np.zeros(size, dtype=np.float32) + self.ptr, self.size, self.max_size = 0, 0, size + + def store(self, obs, act, rew, next_obs, done): + self.obs_buf[self.ptr] = obs + self.obs2_buf[self.ptr] = next_obs + self.act_buf[self.ptr] = act + self.rew_buf[self.ptr] = rew + self.done_buf[self.ptr] = done + self.ptr = (self.ptr+1) % self.max_size + self.size = min(self.size+1, self.max_size) + + def sample_batch(self, batch_size=32): + idxs = np.random.randint(0, self.size, size=batch_size) + batch = dict(obs=self.obs_buf[idxs], + obs2=self.obs2_buf[idxs], + act=self.act_buf[idxs], + rew=self.rew_buf[idxs], + done=self.done_buf[idxs]) + return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()} + + + +def sac(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, + polyak=0.995, lr=1e-3, alpha=0.2, batch_size=100, start_steps=10000, + update_after=1000, update_every=50, num_test_episodes=10, max_ep_len=1000, + logger_kwargs=dict(), save_freq=1): + """ + Soft Actor-Critic (SAC) + + + Args: + env_fn : A function which creates a copy of the environment. + The environment must satisfy the OpenAI Gym API. + + actor_critic: The constructor method for a PyTorch Module with an ``act`` + method, a ``pi`` module, a ``q1`` module, and a ``q2`` module. + The ``act`` method and ``pi`` module should accept batches of + observations as inputs, and ``q1`` and ``q2`` should accept a batch + of observations and a batch of actions as inputs. When called, + ``act``, ``q1``, and ``q2`` should return: + + =========== ================ ====================================== + Call Output Shape Description + =========== ================ ====================================== + ``act`` (batch, act_dim) | Numpy array of actions for each + | observation. + ``q1`` (batch,) | Tensor containing one current estimate + | of Q* for the provided observations + | and actions. (Critical: make sure to + | flatten this!) + ``q2`` (batch,) | Tensor containing the other current + | estimate of Q* for the provided observations + | and actions. (Critical: make sure to + | flatten this!) + =========== ================ ====================================== + + Calling ``pi`` should return: + + =========== ================ ====================================== + Symbol Shape Description + =========== ================ ====================================== + ``a`` (batch, act_dim) | Tensor containing actions from policy + | given observations. + ``logp_pi`` (batch,) | Tensor containing log probabilities of + | actions in ``a``. Importantly: gradients + | should be able to flow back into ``a``. + =========== ================ ====================================== + + ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object + you provided to SAC. + + seed (int): Seed for random number generators. + + steps_per_epoch (int): Number of steps of interaction (state-action pairs) + for the agent and the environment in each epoch. + + epochs (int): Number of epochs to run and train agent. + + replay_size (int): Maximum length of replay buffer. + + gamma (float): Discount factor. (Always between 0 and 1.) + + polyak (float): Interpolation factor in polyak averaging for target + networks. Target networks are updated towards main networks + according to: + + .. math:: \\theta_{\\text{targ}} \\leftarrow + \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta + + where :math:`\\rho` is polyak. (Always between 0 and 1, usually + close to 1.) + + lr (float): Learning rate (used for both policy and value learning). + + alpha (float): Entropy regularization coefficient. (Equivalent to + inverse of reward scale in the original SAC paper.) + + batch_size (int): Minibatch size for SGD. + + start_steps (int): Number of steps for uniform-random action selection, + before running real policy. Helps exploration. + + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + + max_ep_len (int): Maximum length of trajectory / episode / rollout. + + logger_kwargs (dict): Keyword args for EpochLogger. + + save_freq (int): How often (in terms of gap between epochs) to save + the current policy and value function. + + """ + + logger = EpochLogger(**logger_kwargs) + logger.save_config(locals()) + + torch.manual_seed(seed) + np.random.seed(seed) + + env, test_env = env_fn(), env_fn() + obs_dim = env.observation_space.shape + act_dim = env.action_space.shape[0] + + # Action limit for clamping: critically, assumes all dimensions share the same bound! + act_limit = env.action_space.high[0] + + # Create actor-critic module and target networks + ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs) + ac_targ = deepcopy(ac) + + # Freeze target networks with respect to optimizers (only update via polyak averaging) + for p in ac_targ.parameters(): + p.requires_grad = False + + # List of parameters for both Q-networks (save this for convenience) + q_params = itertools.chain(ac.q1.parameters(), ac.q2.parameters()) + + # Experience buffer + replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size) + + # Count variables (protip: try to get a feel for how different size networks behave!) + var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.q1, ac.q2]) + logger.log('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d\n'%var_counts) + + # Set up function for computing SAC Q-losses + def compute_loss_q(data): + o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done'] + + q1 = ac.q1(o,a) + q2 = ac.q2(o,a) + + # Bellman backup for Q functions + with torch.no_grad(): + # Target actions come from *current* policy + a2, logp_a2 = ac.pi(o2) + + # Target Q-values + q1_pi_targ = ac_targ.q1(o2, a2) + q2_pi_targ = ac_targ.q2(o2, a2) + q_pi_targ = torch.min(q1_pi_targ, q2_pi_targ) + backup = r + gamma * (1 - d) * (q_pi_targ - alpha * logp_a2) + + # MSE loss against Bellman backup + loss_q1 = ((q1 - backup)**2).mean() + loss_q2 = ((q2 - backup)**2).mean() + loss_q = loss_q1 + loss_q2 + + # Useful info for logging + q_info = dict(Q1Vals=q1.detach().numpy(), + Q2Vals=q2.detach().numpy()) + + return loss_q, q_info + + # Set up function for computing SAC pi loss + def compute_loss_pi(data): + o = data['obs'] + pi, logp_pi = ac.pi(o) + q1_pi = ac.q1(o, pi) + q2_pi = ac.q2(o, pi) + q_pi = torch.min(q1_pi, q2_pi) + + # Entropy-regularized policy loss + loss_pi = (alpha * logp_pi - q_pi).mean() + + # Useful info for logging + pi_info = dict(LogPi=logp_pi.detach().numpy()) + + return loss_pi, pi_info + + # Set up optimizers for policy and q-function + pi_optimizer = Adam(ac.pi.parameters(), lr=lr) + q_optimizer = Adam(q_params, lr=lr) + + # Set up model saving + logger.setup_pytorch_saver(ac) + + def update(data): + # First run one gradient descent step for Q1 and Q2 + q_optimizer.zero_grad() + loss_q, q_info = compute_loss_q(data) + loss_q.backward() + q_optimizer.step() + + # Record things + logger.store(LossQ=loss_q.item(), **q_info) + + # Freeze Q-networks so you don't waste computational effort + # computing gradients for them during the policy learning step. + for p in q_params: + p.requires_grad = False + + # Next run one gradient descent step for pi. + pi_optimizer.zero_grad() + loss_pi, pi_info = compute_loss_pi(data) + loss_pi.backward() + pi_optimizer.step() + + # Unfreeze Q-networks so you can optimize it at next DDPG step. + for p in q_params: + p.requires_grad = True + + # Record things + logger.store(LossPi=loss_pi.item(), **pi_info) + + # Finally, update target networks by polyak averaging. + with torch.no_grad(): + for p, p_targ in zip(ac.parameters(), ac_targ.parameters()): + # NB: We use an in-place operations "mul_", "add_" to update target + # params, as opposed to "mul" and "add", which would make new tensors. + p_targ.data.mul_(polyak) + p_targ.data.add_((1 - polyak) * p.data) + + def get_action(o, deterministic=False): + return ac.act(torch.as_tensor(o, dtype=torch.float32), + deterministic) + + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 + while not(d or (ep_len == max_ep_len)): + # Take deterministic actions at test time + o, r, d, _ = test_env.step(get_action(o, True)) + ep_ret += r + ep_len += 1 + logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) + + # Prepare for interaction with environment + total_steps = steps_per_epoch * epochs + start_time = time.time() + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Main loop: collect experience in env and update/log each epoch + for t in range(total_steps): + + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy. + if t > start_steps: + a = get_action(o) + else: + a = env.action_space.sample() + + # Step the env + o2, r, d, _ = env.step(a) + ep_ret += r + ep_len += 1 + + # Ignore the "done" signal if it comes from hitting the time + # horizon (that is, when it's an artificial terminal signal + # that isn't based on the agent's state) + d = False if ep_len==max_ep_len else d + + # Store experience to replay buffer + replay_buffer.store(o, a, r, o2, d) + + # Super critical, easy to overlook step: make sure to update + # most recent observation! + o = o2 + + # End of trajectory handling + if d or (ep_len == max_ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for j in range(update_every): + batch = replay_buffer.sample_batch(batch_size) + update(data=batch) + + # End of epoch handling + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch + + # Save model + if (epoch % save_freq == 0) or (epoch == epochs): + logger.save_state({'env': env}, None) + + # Test the performance of the deterministic version of the agent. + test_agent() + + # Log info about epoch + logger.log_tabular('Epoch', epoch) + logger.log_tabular('EpRet', with_min_and_max=True) + logger.log_tabular('TestEpRet', with_min_and_max=True) + logger.log_tabular('EpLen', average_only=True) + logger.log_tabular('TestEpLen', average_only=True) + logger.log_tabular('TotalEnvInteracts', t) + logger.log_tabular('Q1Vals', with_min_and_max=True) + logger.log_tabular('Q2Vals', with_min_and_max=True) + logger.log_tabular('LogPi', with_min_and_max=True) + logger.log_tabular('LossPi', average_only=True) + logger.log_tabular('LossQ', average_only=True) + logger.log_tabular('Time', time.time()-start_time) + logger.dump_tabular() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='HalfCheetah-v2') + parser.add_argument('--hid', type=int, default=256) + parser.add_argument('--l', type=int, default=2) + parser.add_argument('--gamma', type=float, default=0.99) + parser.add_argument('--seed', '-s', type=int, default=0) + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--exp_name', type=str, default='sac') + args = parser.parse_args() + + from spinup.utils.run_utils import setup_logger_kwargs + logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) + + torch.set_num_threads(torch.get_num_threads()) + + sac(lambda : gym.make(args.env), actor_critic=core.MLPActorCritic, + ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), + gamma=args.gamma, seed=args.seed, epochs=args.epochs, + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/pytorch/td3/core.py b/spinup/algos/pytorch/td3/core.py new file mode 100644 index 000000000..acdc55aa2 --- /dev/null +++ b/spinup/algos/pytorch/td3/core.py @@ -0,0 +1,62 @@ +import numpy as np +import scipy.signal + +import torch +import torch.nn as nn + + +def combined_shape(length, shape=None): + if shape is None: + return (length,) + return (length, shape) if np.isscalar(shape) else (length, *shape) + +def mlp(sizes, activation, output_activation=nn.Identity): + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + +def count_vars(module): + return sum([np.prod(p.shape) for p in module.parameters()]) + +class MLPActor(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit): + super().__init__() + pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim] + self.pi = mlp(pi_sizes, activation, nn.Tanh) + self.act_limit = act_limit + + def forward(self, obs): + # Return output from network scaled to action space limits. + return self.act_limit * self.pi(obs) + +class MLPQFunction(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs, act): + q = self.q(torch.cat([obs, act], dim=-1)) + return torch.squeeze(q, -1) # Critical to ensure q has right shape. + +class MLPActorCritic(nn.Module): + + def __init__(self, observation_space, action_space, hidden_sizes=(256,256), + activation=nn.ReLU): + super().__init__() + + obs_dim = observation_space.shape[0] + act_dim = action_space.shape[0] + act_limit = action_space.high[0] + + # build policy and value functions + self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation, act_limit) + self.q1 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation) + self.q2 = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation) + + def act(self, obs): + with torch.no_grad(): + return self.pi(obs).numpy() diff --git a/spinup/algos/pytorch/td3/td3.py b/spinup/algos/pytorch/td3/td3.py new file mode 100644 index 000000000..2d59875c2 --- /dev/null +++ b/spinup/algos/pytorch/td3/td3.py @@ -0,0 +1,368 @@ +from copy import deepcopy +import itertools +import numpy as np +import torch +from torch.optim import Adam +import gym +import time +import spinup.algos.pytorch.td3.core as core +from spinup.utils.logx import EpochLogger + + +class ReplayBuffer: + """ + A simple FIFO experience replay buffer for TD3 agents. + """ + + def __init__(self, obs_dim, act_dim, size): + self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) + self.rew_buf = np.zeros(size, dtype=np.float32) + self.done_buf = np.zeros(size, dtype=np.float32) + self.ptr, self.size, self.max_size = 0, 0, size + + def store(self, obs, act, rew, next_obs, done): + self.obs_buf[self.ptr] = obs + self.obs2_buf[self.ptr] = next_obs + self.act_buf[self.ptr] = act + self.rew_buf[self.ptr] = rew + self.done_buf[self.ptr] = done + self.ptr = (self.ptr+1) % self.max_size + self.size = min(self.size+1, self.max_size) + + def sample_batch(self, batch_size=32): + idxs = np.random.randint(0, self.size, size=batch_size) + batch = dict(obs=self.obs_buf[idxs], + obs2=self.obs2_buf[idxs], + act=self.act_buf[idxs], + rew=self.rew_buf[idxs], + done=self.done_buf[idxs]) + return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()} + + + +def td3(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, + polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, + update_after=1000, update_every=50, act_noise=0.1, target_noise=0.2, + noise_clip=0.5, policy_delay=2, num_test_episodes=10, max_ep_len=1000, + logger_kwargs=dict(), save_freq=1): + """ + Twin Delayed Deep Deterministic Policy Gradient (TD3) + + + Args: + env_fn : A function which creates a copy of the environment. + The environment must satisfy the OpenAI Gym API. + + actor_critic: The constructor method for a PyTorch Module with an ``act`` + method, a ``pi`` module, a ``q1`` module, and a ``q2`` module. + The ``act`` method and ``pi`` module should accept batches of + observations as inputs, and ``q1`` and ``q2`` should accept a batch + of observations and a batch of actions as inputs. When called, + these should return: + + =========== ================ ====================================== + Call Output Shape Description + =========== ================ ====================================== + ``act`` (batch, act_dim) | Numpy array of actions for each + | observation. + ``pi`` (batch, act_dim) | Tensor containing actions from policy + | given observations. + ``q1`` (batch,) | Tensor containing one current estimate + | of Q* for the provided observations + | and actions. (Critical: make sure to + | flatten this!) + ``q2`` (batch,) | Tensor containing the other current + | estimate of Q* for the provided observations + | and actions. (Critical: make sure to + | flatten this!) + =========== ================ ====================================== + + ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object + you provided to TD3. + + seed (int): Seed for random number generators. + + steps_per_epoch (int): Number of steps of interaction (state-action pairs) + for the agent and the environment in each epoch. + + epochs (int): Number of epochs to run and train agent. + + replay_size (int): Maximum length of replay buffer. + + gamma (float): Discount factor. (Always between 0 and 1.) + + polyak (float): Interpolation factor in polyak averaging for target + networks. Target networks are updated towards main networks + according to: + + .. math:: \\theta_{\\text{targ}} \\leftarrow + \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta + + where :math:`\\rho` is polyak. (Always between 0 and 1, usually + close to 1.) + + pi_lr (float): Learning rate for policy. + + q_lr (float): Learning rate for Q-networks. + + batch_size (int): Minibatch size for SGD. + + start_steps (int): Number of steps for uniform-random action selection, + before running real policy. Helps exploration. + + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + + act_noise (float): Stddev for Gaussian exploration noise added to + policy at training time. (At test time, no noise is added.) + + target_noise (float): Stddev for smoothing noise added to target + policy. + + noise_clip (float): Limit for absolute value of target policy + smoothing noise. + + policy_delay (int): Policy will only be updated once every + policy_delay times for each update of the Q-networks. + + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + + max_ep_len (int): Maximum length of trajectory / episode / rollout. + + logger_kwargs (dict): Keyword args for EpochLogger. + + save_freq (int): How often (in terms of gap between epochs) to save + the current policy and value function. + + """ + + logger = EpochLogger(**logger_kwargs) + logger.save_config(locals()) + + torch.manual_seed(seed) + np.random.seed(seed) + + env, test_env = env_fn(), env_fn() + obs_dim = env.observation_space.shape + act_dim = env.action_space.shape[0] + + # Action limit for clamping: critically, assumes all dimensions share the same bound! + act_limit = env.action_space.high[0] + + # Create actor-critic module and target networks + ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs) + ac_targ = deepcopy(ac) + + # Freeze target networks with respect to optimizers (only update via polyak averaging) + for p in ac_targ.parameters(): + p.requires_grad = False + + # List of parameters for both Q-networks (save this for convenience) + q_params = itertools.chain(ac.q1.parameters(), ac.q2.parameters()) + + # Experience buffer + replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size) + + # Count variables (protip: try to get a feel for how different size networks behave!) + var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.q1, ac.q2]) + logger.log('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d\n'%var_counts) + + # Set up function for computing TD3 Q-losses + def compute_loss_q(data): + o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done'] + + q1 = ac.q1(o,a) + q2 = ac.q2(o,a) + + # Bellman backup for Q functions + with torch.no_grad(): + pi_targ = ac_targ.pi(o2) + + # Target policy smoothing + epsilon = torch.randn_like(pi_targ) * target_noise + epsilon = torch.clamp(epsilon, -noise_clip, noise_clip) + a2 = pi_targ + epsilon + a2 = torch.clamp(a2, -act_limit, act_limit) + + # Target Q-values + q1_pi_targ = ac_targ.q1(o2, a2) + q2_pi_targ = ac_targ.q2(o2, a2) + q_pi_targ = torch.min(q1_pi_targ, q2_pi_targ) + backup = r + gamma * (1 - d) * q_pi_targ + + # MSE loss against Bellman backup + loss_q1 = ((q1 - backup)**2).mean() + loss_q2 = ((q2 - backup)**2).mean() + loss_q = loss_q1 + loss_q2 + + # Useful info for logging + loss_info = dict(Q1Vals=q1.detach().numpy(), + Q2Vals=q2.detach().numpy()) + + return loss_q, loss_info + + # Set up function for computing TD3 pi loss + def compute_loss_pi(data): + o = data['obs'] + q1_pi = ac.q1(o, ac.pi(o)) + return -q1_pi.mean() + + # Set up optimizers for policy and q-function + pi_optimizer = Adam(ac.pi.parameters(), lr=pi_lr) + q_optimizer = Adam(q_params, lr=q_lr) + + # Set up model saving + logger.setup_pytorch_saver(ac) + + def update(data, timer): + # First run one gradient descent step for Q1 and Q2 + q_optimizer.zero_grad() + loss_q, loss_info = compute_loss_q(data) + loss_q.backward() + q_optimizer.step() + + # Record things + logger.store(LossQ=loss_q.item(), **loss_info) + + # Possibly update pi and target networks + if timer % policy_delay == 0: + + # Freeze Q-networks so you don't waste computational effort + # computing gradients for them during the policy learning step. + for p in q_params: + p.requires_grad = False + + # Next run one gradient descent step for pi. + pi_optimizer.zero_grad() + loss_pi = compute_loss_pi(data) + loss_pi.backward() + pi_optimizer.step() + + # Unfreeze Q-networks so you can optimize it at next DDPG step. + for p in q_params: + p.requires_grad = True + + # Record things + logger.store(LossPi=loss_pi.item()) + + # Finally, update target networks by polyak averaging. + with torch.no_grad(): + for p, p_targ in zip(ac.parameters(), ac_targ.parameters()): + # NB: We use an in-place operations "mul_", "add_" to update target + # params, as opposed to "mul" and "add", which would make new tensors. + p_targ.data.mul_(polyak) + p_targ.data.add_((1 - polyak) * p.data) + + def get_action(o, noise_scale): + a = ac.act(torch.as_tensor(o, dtype=torch.float32)) + a += noise_scale * np.random.randn(act_dim) + return np.clip(a, -act_limit, act_limit) + + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 + while not(d or (ep_len == max_ep_len)): + # Take deterministic actions at test time (noise_scale=0) + o, r, d, _ = test_env.step(get_action(o, 0)) + ep_ret += r + ep_len += 1 + logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) + + # Prepare for interaction with environment + total_steps = steps_per_epoch * epochs + start_time = time.time() + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Main loop: collect experience in env and update/log each epoch + for t in range(total_steps): + + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy (with some noise, via act_noise). + if t > start_steps: + a = get_action(o, act_noise) + else: + a = env.action_space.sample() + + # Step the env + o2, r, d, _ = env.step(a) + ep_ret += r + ep_len += 1 + + # Ignore the "done" signal if it comes from hitting the time + # horizon (that is, when it's an artificial terminal signal + # that isn't based on the agent's state) + d = False if ep_len==max_ep_len else d + + # Store experience to replay buffer + replay_buffer.store(o, a, r, o2, d) + + # Super critical, easy to overlook step: make sure to update + # most recent observation! + o = o2 + + # End of trajectory handling + if d or (ep_len == max_ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for j in range(update_every): + batch = replay_buffer.sample_batch(batch_size) + update(data=batch, timer=j) + + # End of epoch handling + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch + + # Save model + if (epoch % save_freq == 0) or (epoch == epochs): + logger.save_state({'env': env}, None) + + # Test the performance of the deterministic version of the agent. + test_agent() + + # Log info about epoch + logger.log_tabular('Epoch', epoch) + logger.log_tabular('EpRet', with_min_and_max=True) + logger.log_tabular('TestEpRet', with_min_and_max=True) + logger.log_tabular('EpLen', average_only=True) + logger.log_tabular('TestEpLen', average_only=True) + logger.log_tabular('TotalEnvInteracts', t) + logger.log_tabular('Q1Vals', with_min_and_max=True) + logger.log_tabular('Q2Vals', with_min_and_max=True) + logger.log_tabular('LossPi', average_only=True) + logger.log_tabular('LossQ', average_only=True) + logger.log_tabular('Time', time.time()-start_time) + logger.dump_tabular() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='HalfCheetah-v2') + parser.add_argument('--hid', type=int, default=256) + parser.add_argument('--l', type=int, default=2) + parser.add_argument('--gamma', type=float, default=0.99) + parser.add_argument('--seed', '-s', type=int, default=0) + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--exp_name', type=str, default='td3') + args = parser.parse_args() + + from spinup.utils.run_utils import setup_logger_kwargs + logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) + + td3(lambda : gym.make(args.env), actor_critic=core.MLPActorCritic, + ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), + gamma=args.gamma, seed=args.seed, epochs=args.epochs, + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/pytorch/trpo/trpo.py b/spinup/algos/pytorch/trpo/trpo.py new file mode 100644 index 000000000..eeaf7d034 --- /dev/null +++ b/spinup/algos/pytorch/trpo/trpo.py @@ -0,0 +1,4 @@ +def trpo(*args, **kwargs): + print('\n\nUnfortunately, TRPO has not yet been implemented in PyTorch '\ + + 'for Spinning Up. TRPO will migrate some time in the future.\n\n') + raise NotImplementedError \ No newline at end of file diff --git a/spinup/algos/pytorch/vpg/core.py b/spinup/algos/pytorch/vpg/core.py new file mode 100644 index 000000000..84e9f9889 --- /dev/null +++ b/spinup/algos/pytorch/vpg/core.py @@ -0,0 +1,135 @@ +import numpy as np +import scipy.signal +from gym.spaces import Box, Discrete + +import torch +import torch.nn as nn +from torch.distributions.normal import Normal +from torch.distributions.categorical import Categorical + + +def combined_shape(length, shape=None): + if shape is None: + return (length,) + return (length, shape) if np.isscalar(shape) else (length, *shape) + + +def mlp(sizes, activation, output_activation=nn.Identity): + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + + +def count_vars(module): + return sum([np.prod(p.shape) for p in module.parameters()]) + + +def discount_cumsum(x, discount): + """ + magic from rllab for computing discounted cumulative sums of vectors. + + input: + vector x, + [x0, + x1, + x2] + + output: + [x0 + discount * x1 + discount^2 * x2, + x1 + discount * x2, + x2] + """ + return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] + + +class Actor(nn.Module): + + def _distribution(self, obs): + raise NotImplementedError + + def _log_prob_from_distribution(self, pi, act): + raise NotImplementedError + + def forward(self, obs, act=None): + # Produce action distributions for given observations, and + # optionally compute the log likelihood of given actions under + # those distributions. + pi = self._distribution(obs) + logp_a = None + if act is not None: + logp_a = self._log_prob_from_distribution(pi, act) + return pi, logp_a + + +class MLPCategoricalActor(Actor): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.logits_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation) + + def _distribution(self, obs): + logits = self.logits_net(obs) + return Categorical(logits=logits) + + def _log_prob_from_distribution(self, pi, act): + return pi.log_prob(act) + + +class MLPGaussianActor(Actor): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + log_std = -0.5 * np.ones(act_dim, dtype=np.float32) + self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) + self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation) + + def _distribution(self, obs): + mu = self.mu_net(obs) + std = torch.exp(self.log_std) + return Normal(mu, std) + + def _log_prob_from_distribution(self, pi, act): + return pi.log_prob(act).sum(axis=-1) # Last axis sum needed for Torch Normal distribution + + +class MLPCritic(nn.Module): + + def __init__(self, obs_dim, hidden_sizes, activation): + super().__init__() + self.v_net = mlp([obs_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs): + return torch.squeeze(self.v_net(obs), -1) # Critical to ensure v has right shape. + + + +class MLPActorCritic(nn.Module): + + + def __init__(self, observation_space, action_space, + hidden_sizes=(64,64), activation=nn.Tanh): + super().__init__() + + obs_dim = observation_space.shape[0] + + # policy builder depends on action space + if isinstance(action_space, Box): + self.pi = MLPGaussianActor(obs_dim, action_space.shape[0], hidden_sizes, activation) + elif isinstance(action_space, Discrete): + self.pi = MLPCategoricalActor(obs_dim, action_space.n, hidden_sizes, activation) + + # build value function + self.v = MLPCritic(obs_dim, hidden_sizes, activation) + + def step(self, obs): + with torch.no_grad(): + pi = self.pi._distribution(obs) + a = pi.sample() + logp_a = self.pi._log_prob_from_distribution(pi, a) + v = self.v(obs) + return a.numpy(), v.numpy(), logp_a.numpy() + + def act(self, obs): + return self.step(obs)[0] \ No newline at end of file diff --git a/spinup/algos/pytorch/vpg/vpg.py b/spinup/algos/pytorch/vpg/vpg.py new file mode 100644 index 000000000..4639b56ae --- /dev/null +++ b/spinup/algos/pytorch/vpg/vpg.py @@ -0,0 +1,350 @@ +import numpy as np +import torch +from torch.optim import Adam +import gym +import time +import spinup.algos.pytorch.vpg.core as core +from spinup.utils.logx import EpochLogger +from spinup.utils.mpi_pytorch import setup_pytorch_for_mpi, sync_params, mpi_avg_grads +from spinup.utils.mpi_tools import mpi_fork, mpi_avg, proc_id, mpi_statistics_scalar, num_procs + + +class VPGBuffer: + """ + A buffer for storing trajectories experienced by a VPG agent interacting + with the environment, and using Generalized Advantage Estimation (GAE-Lambda) + for calculating the advantages of state-action pairs. + """ + + def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): + self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) + self.adv_buf = np.zeros(size, dtype=np.float32) + self.rew_buf = np.zeros(size, dtype=np.float32) + self.ret_buf = np.zeros(size, dtype=np.float32) + self.val_buf = np.zeros(size, dtype=np.float32) + self.logp_buf = np.zeros(size, dtype=np.float32) + self.gamma, self.lam = gamma, lam + self.ptr, self.path_start_idx, self.max_size = 0, 0, size + + def store(self, obs, act, rew, val, logp): + """ + Append one timestep of agent-environment interaction to the buffer. + """ + assert self.ptr < self.max_size # buffer has to have room so you can store + self.obs_buf[self.ptr] = obs + self.act_buf[self.ptr] = act + self.rew_buf[self.ptr] = rew + self.val_buf[self.ptr] = val + self.logp_buf[self.ptr] = logp + self.ptr += 1 + + def finish_path(self, last_val=0): + """ + Call this at the end of a trajectory, or when one gets cut off + by an epoch ending. This looks back in the buffer to where the + trajectory started, and uses rewards and value estimates from + the whole trajectory to compute advantage estimates with GAE-Lambda, + as well as compute the rewards-to-go for each state, to use as + the targets for the value function. + + The "last_val" argument should be 0 if the trajectory ended + because the agent reached a terminal state (died), and otherwise + should be V(s_T), the value function estimated for the last state. + This allows us to bootstrap the reward-to-go calculation to account + for timesteps beyond the arbitrary episode horizon (or epoch cutoff). + """ + + path_slice = slice(self.path_start_idx, self.ptr) + rews = np.append(self.rew_buf[path_slice], last_val) + vals = np.append(self.val_buf[path_slice], last_val) + + # the next two lines implement GAE-Lambda advantage calculation + deltas = rews[:-1] + self.gamma * vals[1:] - vals[:-1] + self.adv_buf[path_slice] = core.discount_cumsum(deltas, self.gamma * self.lam) + + # the next line computes rewards-to-go, to be targets for the value function + self.ret_buf[path_slice] = core.discount_cumsum(rews, self.gamma)[:-1] + + self.path_start_idx = self.ptr + + def get(self): + """ + Call this at the end of an epoch to get all of the data from + the buffer, with advantages appropriately normalized (shifted to have + mean zero and std one). Also, resets some pointers in the buffer. + """ + assert self.ptr == self.max_size # buffer has to be full before you can get + self.ptr, self.path_start_idx = 0, 0 + # the next two lines implement the advantage normalization trick + adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf) + self.adv_buf = (self.adv_buf - adv_mean) / adv_std + data = dict(obs=self.obs_buf, act=self.act_buf, ret=self.ret_buf, + adv=self.adv_buf, logp=self.logp_buf) + return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in data.items()} + + + +def vpg(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0, + steps_per_epoch=4000, epochs=50, gamma=0.99, pi_lr=3e-4, + vf_lr=1e-3, train_v_iters=80, lam=0.97, max_ep_len=1000, + logger_kwargs=dict(), save_freq=10): + """ + Vanilla Policy Gradient + + (with GAE-Lambda for advantage estimation) + + Args: + env_fn : A function which creates a copy of the environment. + The environment must satisfy the OpenAI Gym API. + + actor_critic: The constructor method for a PyTorch Module with a + ``step`` method, an ``act`` method, a ``pi`` module, and a ``v`` + module. The ``step`` method should accept a batch of observations + and return: + + =========== ================ ====================================== + Symbol Shape Description + =========== ================ ====================================== + ``a`` (batch, act_dim) | Numpy array of actions for each + | observation. + ``v`` (batch,) | Numpy array of value estimates + | for the provided observations. + ``logp_a`` (batch,) | Numpy array of log probs for the + | actions in ``a``. + =========== ================ ====================================== + + The ``act`` method behaves the same as ``step`` but only returns ``a``. + + The ``pi`` module's forward call should accept a batch of + observations and optionally a batch of actions, and return: + + =========== ================ ====================================== + Symbol Shape Description + =========== ================ ====================================== + ``pi`` N/A | Torch Distribution object, containing + | a batch of distributions describing + | the policy for the provided observations. + ``logp_a`` (batch,) | Optional (only returned if batch of + | actions is given). Tensor containing + | the log probability, according to + | the policy, of the provided actions. + | If actions not given, will contain + | ``None``. + =========== ================ ====================================== + + The ``v`` module's forward call should accept a batch of observations + and return: + + =========== ================ ====================================== + Symbol Shape Description + =========== ================ ====================================== + ``v`` (batch,) | Tensor containing the value estimates + | for the provided observations. (Critical: + | make sure to flatten this!) + =========== ================ ====================================== + + ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object + you provided to VPG. + + seed (int): Seed for random number generators. + + steps_per_epoch (int): Number of steps of interaction (state-action pairs) + for the agent and the environment in each epoch. + + epochs (int): Number of epochs of interaction (equivalent to + number of policy updates) to perform. + + gamma (float): Discount factor. (Always between 0 and 1.) + + pi_lr (float): Learning rate for policy optimizer. + + vf_lr (float): Learning rate for value function optimizer. + + train_v_iters (int): Number of gradient descent steps to take on + value function per epoch. + + lam (float): Lambda for GAE-Lambda. (Always between 0 and 1, + close to 1.) + + max_ep_len (int): Maximum length of trajectory / episode / rollout. + + logger_kwargs (dict): Keyword args for EpochLogger. + + save_freq (int): How often (in terms of gap between epochs) to save + the current policy and value function. + + """ + + # Special function to avoid certain slowdowns from PyTorch + MPI combo. + setup_pytorch_for_mpi() + + # Set up logger and save configuration + logger = EpochLogger(**logger_kwargs) + logger.save_config(locals()) + + # Random seed + seed += 10000 * proc_id() + torch.manual_seed(seed) + np.random.seed(seed) + + # Instantiate environment + env = env_fn() + obs_dim = env.observation_space.shape + act_dim = env.action_space.shape + + # Create actor-critic module + ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs) + + # Sync params across processes + sync_params(ac) + + # Count variables + var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.v]) + logger.log('\nNumber of parameters: \t pi: %d, \t v: %d\n'%var_counts) + + # Set up experience buffer + local_steps_per_epoch = int(steps_per_epoch / num_procs()) + buf = VPGBuffer(obs_dim, act_dim, local_steps_per_epoch, gamma, lam) + + # Set up function for computing VPG policy loss + def compute_loss_pi(data): + obs, act, adv, logp_old = data['obs'], data['act'], data['adv'], data['logp'] + + # Policy loss + pi, logp = ac.pi(obs, act) + loss_pi = -(logp * adv).mean() + + # Useful extra info + approx_kl = (logp_old - logp).mean().item() + ent = pi.entropy().mean().item() + pi_info = dict(kl=approx_kl, ent=ent) + + return loss_pi, pi_info + + # Set up function for computing value loss + def compute_loss_v(data): + obs, ret = data['obs'], data['ret'] + return ((ac.v(obs) - ret)**2).mean() + + # Set up optimizers for policy and value function + pi_optimizer = Adam(ac.pi.parameters(), lr=pi_lr) + vf_optimizer = Adam(ac.v.parameters(), lr=vf_lr) + + # Set up model saving + logger.setup_pytorch_saver(ac) + + def update(): + data = buf.get() + + # Get loss and info values before update + pi_l_old, pi_info_old = compute_loss_pi(data) + pi_l_old = pi_l_old.item() + v_l_old = compute_loss_v(data).item() + + # Train policy with a single step of gradient descent + pi_optimizer.zero_grad() + loss_pi, pi_info = compute_loss_pi(data) + loss_pi.backward() + mpi_avg_grads(ac.pi) # average grads across MPI processes + pi_optimizer.step() + + # Value function learning + for i in range(train_v_iters): + vf_optimizer.zero_grad() + loss_v = compute_loss_v(data) + loss_v.backward() + mpi_avg_grads(ac.v) # average grads across MPI processes + vf_optimizer.step() + + # Log changes from update + kl, ent = pi_info['kl'], pi_info_old['ent'] + logger.store(LossPi=pi_l_old, LossV=v_l_old, + KL=kl, Entropy=ent, + DeltaLossPi=(loss_pi.item() - pi_l_old), + DeltaLossV=(loss_v.item() - v_l_old)) + + # Prepare for interaction with environment + start_time = time.time() + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Main loop: collect experience in env and update/log each epoch + for epoch in range(epochs): + for t in range(local_steps_per_epoch): + a, v, logp = ac.step(torch.as_tensor(o, dtype=torch.float32)) + + next_o, r, d, _ = env.step(a) + ep_ret += r + ep_len += 1 + + # save and log + buf.store(o, a, r, v, logp) + logger.store(VVals=v) + + # Update obs (critical!) + o = next_o + + timeout = ep_len == max_ep_len + terminal = d or timeout + epoch_ended = t==local_steps_per_epoch-1 + + if terminal or epoch_ended: + if epoch_ended and not(terminal): + print('Warning: trajectory cut off by epoch at %d steps.'%ep_len, flush=True) + # if trajectory didn't reach terminal state, bootstrap value target + if timeout or epoch_ended: + _, v, _ = ac.step(torch.as_tensor(o, dtype=torch.float32)) + else: + v = 0 + buf.finish_path(v) + if terminal: + # only save EpRet / EpLen if trajectory finished + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + + # Save model + if (epoch % save_freq == 0) or (epoch == epochs-1): + logger.save_state({'env': env}, None) + + # Perform VPG update! + update() + + # Log info about epoch + logger.log_tabular('Epoch', epoch) + logger.log_tabular('EpRet', with_min_and_max=True) + logger.log_tabular('EpLen', average_only=True) + logger.log_tabular('VVals', with_min_and_max=True) + logger.log_tabular('TotalEnvInteracts', (epoch+1)*steps_per_epoch) + logger.log_tabular('LossPi', average_only=True) + logger.log_tabular('LossV', average_only=True) + logger.log_tabular('DeltaLossPi', average_only=True) + logger.log_tabular('DeltaLossV', average_only=True) + logger.log_tabular('Entropy', average_only=True) + logger.log_tabular('KL', average_only=True) + logger.log_tabular('Time', time.time()-start_time) + logger.dump_tabular() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='HalfCheetah-v2') + parser.add_argument('--hid', type=int, default=64) + parser.add_argument('--l', type=int, default=2) + parser.add_argument('--gamma', type=float, default=0.99) + parser.add_argument('--seed', '-s', type=int, default=0) + parser.add_argument('--cpu', type=int, default=4) + parser.add_argument('--steps', type=int, default=4000) + parser.add_argument('--epochs', type=int, default=50) + parser.add_argument('--exp_name', type=str, default='vpg') + args = parser.parse_args() + + mpi_fork(args.cpu) # run parallel code with mpi + + from spinup.utils.run_utils import setup_logger_kwargs + logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) + + vpg(lambda : gym.make(args.env), actor_critic=core.MLPActorCritic, + ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, + seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs, + logger_kwargs=logger_kwargs) \ No newline at end of file diff --git a/spinup/algos/ddpg/__init__.py b/spinup/algos/tf1/ddpg/__init__.py similarity index 100% rename from spinup/algos/ddpg/__init__.py rename to spinup/algos/tf1/ddpg/__init__.py diff --git a/spinup/algos/ddpg/core.py b/spinup/algos/tf1/ddpg/core.py similarity index 93% rename from spinup/algos/ddpg/core.py rename to spinup/algos/tf1/ddpg/core.py index 95c62c7f4..84ec1e93c 100644 --- a/spinup/algos/ddpg/core.py +++ b/spinup/algos/tf1/ddpg/core.py @@ -23,7 +23,7 @@ def count_vars(scope): """ Actor-Critics """ -def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu, +def mlp_actor_critic(x, a, hidden_sizes=(256,256), activation=tf.nn.relu, output_activation=tf.tanh, action_space=None): act_dim = a.shape.as_list()[-1] act_limit = action_space.high[0] @@ -33,4 +33,4 @@ def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu, q = tf.squeeze(mlp(tf.concat([x,a], axis=-1), list(hidden_sizes)+[1], activation, None), axis=1) with tf.variable_scope('q', reuse=True): q_pi = tf.squeeze(mlp(tf.concat([x,pi], axis=-1), list(hidden_sizes)+[1], activation, None), axis=1) - return pi, q, q_pi \ No newline at end of file + return pi, q, q_pi diff --git a/spinup/algos/ddpg/ddpg.py b/spinup/algos/tf1/ddpg/ddpg.py similarity index 85% rename from spinup/algos/ddpg/ddpg.py rename to spinup/algos/tf1/ddpg/ddpg.py index 44ba9ff60..90dabaa2d 100644 --- a/spinup/algos/ddpg/ddpg.py +++ b/spinup/algos/tf1/ddpg/ddpg.py @@ -2,8 +2,8 @@ import tensorflow as tf import gym import time -from spinup.algos.ddpg import core -from spinup.algos.ddpg.core import get_vars +from spinup.algos.tf1.ddpg import core +from spinup.algos.tf1.ddpg.core import get_vars from spinup.utils.logx import EpochLogger @@ -37,16 +37,16 @@ def sample_batch(self, batch_size=32): rews=self.rews_buf[idxs], done=self.done_buf[idxs]) -""" -Deep Deterministic Policy Gradient (DDPG) -""" def ddpg(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, - steps_per_epoch=5000, epochs=100, replay_size=int(1e6), gamma=0.99, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, - act_noise=0.1, max_ep_len=1000, logger_kwargs=dict(), save_freq=1): + update_after=1000, update_every=50, act_noise=0.1, num_test_episodes=10, + max_ep_len=1000, logger_kwargs=dict(), save_freq=1): """ + Deep Deterministic Policy Gradient (DDPG) + Args: env_fn : A function which creates a copy of the environment. @@ -102,9 +102,21 @@ def ddpg(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, start_steps (int): Number of steps for uniform-random action selection, before running real policy. Helps exploration. + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + act_noise (float): Stddev for Gaussian exploration noise added to policy at training time. (At test time, no noise is added.) + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + max_ep_len (int): Maximum length of trajectory / episode / rollout. logger_kwargs (dict): Keyword args for EpochLogger. @@ -183,9 +195,9 @@ def get_action(o, noise_scale): a += noise_scale * np.random.randn(act_dim) return np.clip(a, -act_limit, act_limit) - def test_agent(n=10): - for j in range(n): - o, r, d, ep_ret, ep_len = test_env.reset(), 0, False, 0, 0 + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 while not(d or (ep_len == max_ep_len)): # Take deterministic actions at test time (noise_scale=0) o, r, d, _ = test_env.step(get_action(o, 0)) @@ -193,18 +205,17 @@ def test_agent(n=10): ep_len += 1 logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) - start_time = time.time() - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + # Prepare for interaction with environment total_steps = steps_per_epoch * epochs + start_time = time.time() + o, ep_ret, ep_len = env.reset(), 0, 0 # Main loop: collect experience in env and update/log each epoch for t in range(total_steps): - """ - Until start_steps have elapsed, randomly sample actions - from a uniform distribution for better exploration. Afterwards, - use the learned policy (with some noise, via act_noise). - """ + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy (with some noise, via act_noise). if t > start_steps: a = get_action(o, act_noise) else: @@ -227,12 +238,14 @@ def test_agent(n=10): # most recent observation! o = o2 + # End of trajectory handling if d or (ep_len == max_ep_len): - """ - Perform all DDPG updates at the end of the trajectory, - in accordance with tuning done by TD3 paper authors. - """ - for _ in range(ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for _ in range(update_every): batch = replay_buffer.sample_batch(batch_size) feed_dict = {x_ph: batch['obs1'], x2_ph: batch['obs2'], @@ -249,15 +262,12 @@ def test_agent(n=10): outs = sess.run([pi_loss, train_pi_op, target_update], feed_dict) logger.store(LossPi=outs[0]) - logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 - # End of epoch wrap-up - if t > 0 and t % steps_per_epoch == 0: - epoch = t // steps_per_epoch + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch # Save model - if (epoch % save_freq == 0) or (epoch == epochs-1): + if (epoch % save_freq == 0) or (epoch == epochs): logger.save_state({'env': env}, None) # Test the performance of the deterministic version of the agent. @@ -280,8 +290,8 @@ def test_agent(n=10): import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='HalfCheetah-v2') - parser.add_argument('--hid', type=int, default=300) - parser.add_argument('--l', type=int, default=1) + parser.add_argument('--hid', type=int, default=256) + parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--epochs', type=int, default=50) @@ -294,4 +304,4 @@ def test_agent(n=10): ddpg(lambda : gym.make(args.env), actor_critic=core.mlp_actor_critic, ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, seed=args.seed, epochs=args.epochs, - logger_kwargs=logger_kwargs) \ No newline at end of file + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/ppo/__init__.py b/spinup/algos/tf1/ppo/__init__.py similarity index 100% rename from spinup/algos/ppo/__init__.py rename to spinup/algos/tf1/ppo/__init__.py diff --git a/spinup/algos/ppo/core.py b/spinup/algos/tf1/ppo/core.py similarity index 99% rename from spinup/algos/ppo/core.py rename to spinup/algos/tf1/ppo/core.py index 03dc0ccd5..4cb360f46 100644 --- a/spinup/algos/ppo/core.py +++ b/spinup/algos/tf1/ppo/core.py @@ -101,4 +101,4 @@ def mlp_actor_critic(x, a, hidden_sizes=(64,64), activation=tf.tanh, pi, logp, logp_pi = policy(x, a, hidden_sizes, activation, output_activation, action_space) with tf.variable_scope('v'): v = tf.squeeze(mlp(x, list(hidden_sizes)+[1], activation, None), axis=1) - return pi, logp, logp_pi, v \ No newline at end of file + return pi, logp, logp_pi, v diff --git a/spinup/algos/ppo/ppo.py b/spinup/algos/tf1/ppo/ppo.py similarity index 96% rename from spinup/algos/ppo/ppo.py rename to spinup/algos/tf1/ppo/ppo.py index ef3f22d36..70ec8859b 100644 --- a/spinup/algos/ppo/ppo.py +++ b/spinup/algos/tf1/ppo/ppo.py @@ -2,7 +2,7 @@ import tensorflow as tf import gym import time -import spinup.algos.ppo.core as core +import spinup.algos.tf1.ppo.core as core from spinup.utils.logx import EpochLogger from spinup.utils.mpi_tf import MpiAdamOptimizer, sync_all_params from spinup.utils.mpi_tools import mpi_fork, mpi_avg, proc_id, mpi_statistics_scalar, num_procs @@ -82,18 +82,15 @@ def get(self): self.ret_buf, self.logp_buf] -""" -Proximal Policy Optimization (by clipping), - -with early stopping based on approximate KL - -""" def ppo(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, steps_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=3e-4, vf_lr=1e-3, train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, target_kl=0.01, logger_kwargs=dict(), save_freq=10): """ + Proximal Policy Optimization (by clipping), + + with early stopping based on approximate KL Args: env_fn : A function which creates a copy of the environment. @@ -136,7 +133,8 @@ def ppo(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, Roughly: how far can the new policy go from the old policy while still profiting (improving the objective function)? The new policy can still go farther than the clip_ratio says, but it doesn't help - on the objective anymore. (Usually small, 0.1 to 0.3.) + on the objective anymore. (Usually small, 0.1 to 0.3.) Typically + denoted by :math:`\epsilon`. pi_lr (float): Learning rate for policy optimizer. @@ -255,25 +253,28 @@ def update(): for t in range(local_steps_per_epoch): a, v_t, logp_t = sess.run(get_action_ops, feed_dict={x_ph: o.reshape(1,-1)}) + o2, r, d, _ = env.step(a[0]) + ep_ret += r + ep_len += 1 + # save and log buf.store(o, a, r, v_t, logp_t) logger.store(VVals=v_t) - o, r, d, _ = env.step(a[0]) - ep_ret += r - ep_len += 1 + # Update obs (critical!) + o = o2 terminal = d or (ep_len == max_ep_len) if terminal or (t==local_steps_per_epoch-1): if not(terminal): print('Warning: trajectory cut off by epoch at %d steps.'%ep_len) # if trajectory didn't reach terminal state, bootstrap value target - last_val = r if d else sess.run(v, feed_dict={x_ph: o.reshape(1,-1)}) + last_val = 0 if d else sess.run(v, feed_dict={x_ph: o.reshape(1,-1)}) buf.finish_path(last_val) if terminal: # only save EpRet / EpLen if trajectory finished logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 # Save model if (epoch % save_freq == 0) or (epoch == epochs-1): @@ -321,4 +322,4 @@ def update(): ppo(lambda : gym.make(args.env), actor_critic=core.mlp_actor_critic, ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs, - logger_kwargs=logger_kwargs) \ No newline at end of file + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/sac/__init__.py b/spinup/algos/tf1/sac/__init__.py similarity index 100% rename from spinup/algos/sac/__init__.py rename to spinup/algos/tf1/sac/__init__.py diff --git a/spinup/algos/sac/core.py b/spinup/algos/tf1/sac/core.py similarity index 51% rename from spinup/algos/sac/core.py rename to spinup/algos/tf1/sac/core.py index 444bd21c1..f23140314 100644 --- a/spinup/algos/sac/core.py +++ b/spinup/algos/tf1/sac/core.py @@ -25,11 +25,6 @@ def gaussian_likelihood(x, mu, log_std): pre_sum = -0.5 * (((x-mu)/(tf.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi)) return tf.reduce_sum(pre_sum, axis=1) -def clip_but_pass_gradient(x, l=-1., u=1.): - clip_up = tf.cast(x > u, tf.float32) - clip_low = tf.cast(x < l, tf.float32) - return x + tf.stop_gradient((u - x)*clip_up + (l - x)*clip_low) - """ Policies @@ -42,28 +37,8 @@ def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation): act_dim = a.shape.as_list()[-1] net = mlp(x, list(hidden_sizes), activation, activation) mu = tf.layers.dense(net, act_dim, activation=output_activation) - - """ - Because algorithm maximizes trade-off of reward and entropy, - entropy must be unique to state---and therefore log_stds need - to be a neural network output instead of a shared-across-states - learnable parameter vector. But for deep Relu and other nets, - simply sticking an activationless dense layer at the end would - be quite bad---at the beginning of training, a randomly initialized - net could produce extremely large values for the log_stds, which - would result in some actions being either entirely deterministic - or too random to come back to earth. Either of these introduces - numerical instability which could break the algorithm. To - protect against that, we'll constrain the output range of the - log_stds, to lie within [LOG_STD_MIN, LOG_STD_MAX]. This is - slightly different from the trick used by the original authors of - SAC---they used tf.clip_by_value instead of squashing and rescaling. - I prefer this approach because it allows gradient propagation - through log_std where clipping wouldn't, but I don't know if - it makes much of a difference. - """ - log_std = tf.layers.dense(net, act_dim, activation=tf.tanh) - log_std = LOG_STD_MIN + 0.5 * (LOG_STD_MAX - LOG_STD_MIN) * (log_std + 1) + log_std = tf.layers.dense(net, act_dim, activation=None) + log_std = tf.clip_by_value(log_std, LOG_STD_MIN, LOG_STD_MAX) std = tf.exp(log_std) pi = mu + tf.random_normal(tf.shape(mu)) * std @@ -71,17 +46,22 @@ def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation): return mu, pi, logp_pi def apply_squashing_func(mu, pi, logp_pi): + # Adjustment to log prob + # NOTE: This formula is a little bit magic. To get an understanding of where it + # comes from, check out the original SAC paper (arXiv 1801.01290) and look in + # appendix C. This is a more numerically-stable equivalent to Eq 21. + # Try deriving it yourself as a (very difficult) exercise. :) + logp_pi -= tf.reduce_sum(2*(np.log(2) - pi - tf.nn.softplus(-2*pi)), axis=1) + + # Squash those unbounded actions! mu = tf.tanh(mu) pi = tf.tanh(pi) - # To avoid evil machine precision error, strictly clip 1-pi**2 to [0,1] range. - logp_pi -= tf.reduce_sum(tf.log(clip_but_pass_gradient(1 - pi**2, l=0, u=1) + 1e-6), axis=1) return mu, pi, logp_pi - """ Actor-Critics """ -def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu, +def mlp_actor_critic(x, a, hidden_sizes=(256,256), activation=tf.nn.relu, output_activation=None, policy=mlp_gaussian_policy, action_space=None): # policy with tf.variable_scope('pi'): @@ -97,12 +77,6 @@ def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu, vf_mlp = lambda x : tf.squeeze(mlp(x, list(hidden_sizes)+[1], activation, None), axis=1) with tf.variable_scope('q1'): q1 = vf_mlp(tf.concat([x,a], axis=-1)) - with tf.variable_scope('q1', reuse=True): - q1_pi = vf_mlp(tf.concat([x,pi], axis=-1)) with tf.variable_scope('q2'): q2 = vf_mlp(tf.concat([x,a], axis=-1)) - with tf.variable_scope('q2', reuse=True): - q2_pi = vf_mlp(tf.concat([x,pi], axis=-1)) - with tf.variable_scope('v'): - v = vf_mlp(x) - return mu, pi, logp_pi, q1, q2, q1_pi, q2_pi, v \ No newline at end of file + return mu, pi, logp_pi, q1, q2 diff --git a/spinup/algos/sac/sac.py b/spinup/algos/tf1/sac/sac.py similarity index 78% rename from spinup/algos/sac/sac.py rename to spinup/algos/tf1/sac/sac.py index 0f0eb1d90..e31a09dd2 100644 --- a/spinup/algos/sac/sac.py +++ b/spinup/algos/tf1/sac/sac.py @@ -2,8 +2,8 @@ import tensorflow as tf import gym import time -from spinup.algos.sac import core -from spinup.algos.sac.core import get_vars +from spinup.algos.tf1.sac import core +from spinup.algos.tf1.sac.core import get_vars from spinup.utils.logx import EpochLogger @@ -37,18 +37,16 @@ def sample_batch(self, batch_size=32): rews=self.rews_buf[idxs], done=self.done_buf[idxs]) -""" -Soft Actor-Critic -(With slight variations that bring it closer to TD3) - -""" def sac(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, - steps_per_epoch=5000, epochs=100, replay_size=int(1e6), gamma=0.99, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, polyak=0.995, lr=1e-3, alpha=0.2, batch_size=100, start_steps=10000, - max_ep_len=1000, logger_kwargs=dict(), save_freq=1): + update_after=1000, update_every=50, num_test_episodes=10, max_ep_len=1000, + logger_kwargs=dict(), save_freq=1): """ + Soft Actor-Critic (SAC) + Args: env_fn : A function which creates a copy of the environment. @@ -76,14 +74,6 @@ def sac(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, ``q2`` (batch,) | Gives another estimate of Q* for | states in ``x_ph`` and actions in | ``a_ph``. - ``q1_pi`` (batch,) | Gives the composition of ``q1`` and - | ``pi`` for states in ``x_ph``: - | q1(x, pi(x)). - ``q2_pi`` (batch,) | Gives the composition of ``q2`` and - | ``pi`` for states in ``x_ph``: - | q2(x, pi(x)). - ``v`` (batch,) | Gives the value estimate for states - | in ``x_ph``. =========== ================ ====================================== ac_kwargs (dict): Any kwargs appropriate for the actor_critic @@ -120,6 +110,18 @@ def sac(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, start_steps (int): Number of steps for uniform-random action selection, before running real policy. Helps exploration. + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + max_ep_len (int): Maximum length of trajectory / episode / rollout. logger_kwargs (dict): Keyword args for EpochLogger. @@ -150,34 +152,39 @@ def sac(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, # Main outputs from computation graph with tf.variable_scope('main'): - mu, pi, logp_pi, q1, q2, q1_pi, q2_pi, v = actor_critic(x_ph, a_ph, **ac_kwargs) + mu, pi, logp_pi, q1, q2 = actor_critic(x_ph, a_ph, **ac_kwargs) + + with tf.variable_scope('main', reuse=True): + # compose q with pi, for pi-learning + _, _, _, q1_pi, q2_pi = actor_critic(x_ph, pi, **ac_kwargs) + + # get actions and log probs of actions for next states, for Q-learning + _, pi_next, logp_pi_next, _, _ = actor_critic(x2_ph, a_ph, **ac_kwargs) # Target value network with tf.variable_scope('target'): - _, _, _, _, _, _, _, v_targ = actor_critic(x2_ph, a_ph, **ac_kwargs) + # target q values, using actions from *current* policy + _, _, _, q1_targ, q2_targ = actor_critic(x2_ph, pi_next, **ac_kwargs) # Experience buffer replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size) # Count variables - var_counts = tuple(core.count_vars(scope) for scope in - ['main/pi', 'main/q1', 'main/q2', 'main/v', 'main']) - print(('\nNumber of parameters: \t pi: %d, \t' + \ - 'q1: %d, \t q2: %d, \t v: %d, \t total: %d\n')%var_counts) + var_counts = tuple(core.count_vars(scope) for scope in ['main/pi', 'main/q1', 'main/q2', 'main']) + print('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d, \t total: %d\n'%var_counts) # Min Double-Q: min_q_pi = tf.minimum(q1_pi, q2_pi) + min_q_targ = tf.minimum(q1_targ, q2_targ) - # Targets for Q and V regression - q_backup = tf.stop_gradient(r_ph + gamma*(1-d_ph)*v_targ) - v_backup = tf.stop_gradient(min_q_pi - alpha * logp_pi) + # Entropy-regularized Bellman backup for Q functions, using Clipped Double-Q targets + q_backup = tf.stop_gradient(r_ph + gamma*(1-d_ph)*(min_q_targ - alpha * logp_pi_next)) # Soft actor-critic losses - pi_loss = tf.reduce_mean(alpha * logp_pi - q1_pi) + pi_loss = tf.reduce_mean(alpha * logp_pi - min_q_pi) q1_loss = 0.5 * tf.reduce_mean((q_backup - q1)**2) q2_loss = 0.5 * tf.reduce_mean((q_backup - q2)**2) - v_loss = 0.5 * tf.reduce_mean((v_backup - v)**2) - value_loss = q1_loss + q2_loss + v_loss + value_loss = q1_loss + q2_loss # Policy train op # (has to be separate from value train op, because q1_pi appears in pi_loss) @@ -187,7 +194,7 @@ def sac(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, # Value train op # (control dep of train_pi_op because sess.run otherwise evaluates in nondeterministic order) value_optimizer = tf.train.AdamOptimizer(learning_rate=lr) - value_params = get_vars('main/q') + get_vars('main/v') + value_params = get_vars('main/q') with tf.control_dependencies([train_pi_op]): train_value_op = value_optimizer.minimize(value_loss, var_list=value_params) @@ -198,7 +205,7 @@ def sac(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, for v_main, v_targ in zip(get_vars('main'), get_vars('target'))]) # All ops to call during one training step - step_ops = [pi_loss, q1_loss, q2_loss, v_loss, q1, q2, v, logp_pi, + step_ops = [pi_loss, q1_loss, q2_loss, q1, q2, logp_pi, train_pi_op, train_value_op, target_update] # Initializing targets to match main variables @@ -211,16 +218,15 @@ def sac(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, # Setup model saving logger.setup_tf_saver(sess, inputs={'x': x_ph, 'a': a_ph}, - outputs={'mu': mu, 'pi': pi, 'q1': q1, 'q2': q2, 'v': v}) + outputs={'mu': mu, 'pi': pi, 'q1': q1, 'q2': q2}) def get_action(o, deterministic=False): act_op = mu if deterministic else pi return sess.run(act_op, feed_dict={x_ph: o.reshape(1,-1)})[0] - def test_agent(n=10): - global sess, mu, pi, q1, q2, q1_pi, q2_pi - for j in range(n): - o, r, d, ep_ret, ep_len = test_env.reset(), 0, False, 0, 0 + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 while not(d or (ep_len == max_ep_len)): # Take deterministic actions at test time o, r, d, _ = test_env.step(get_action(o, True)) @@ -229,17 +235,15 @@ def test_agent(n=10): logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) start_time = time.time() - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 total_steps = steps_per_epoch * epochs # Main loop: collect experience in env and update/log each epoch for t in range(total_steps): - """ - Until start_steps have elapsed, randomly sample actions - from a uniform distribution for better exploration. Afterwards, - use the learned policy. - """ + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy. if t > start_steps: a = get_action(o) else: @@ -262,13 +266,14 @@ def test_agent(n=10): # most recent observation! o = o2 + # End of trajectory handling if d or (ep_len == max_ep_len): - """ - Perform all SAC updates at the end of the trajectory. - This is a slight difference from the SAC specified in the - original paper. - """ - for j in range(ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for j in range(update_every): batch = replay_buffer.sample_batch(batch_size) feed_dict = {x_ph: batch['obs1'], x2_ph: batch['obs2'], @@ -278,19 +283,14 @@ def test_agent(n=10): } outs = sess.run(step_ops, feed_dict) logger.store(LossPi=outs[0], LossQ1=outs[1], LossQ2=outs[2], - LossV=outs[3], Q1Vals=outs[4], Q2Vals=outs[5], - VVals=outs[6], LogPi=outs[7]) - - logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 - + Q1Vals=outs[3], Q2Vals=outs[4], LogPi=outs[5]) # End of epoch wrap-up - if t > 0 and t % steps_per_epoch == 0: - epoch = t // steps_per_epoch + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch # Save model - if (epoch % save_freq == 0) or (epoch == epochs-1): + if (epoch % save_freq == 0) or (epoch == epochs): logger.save_state({'env': env}, None) # Test the performance of the deterministic version of the agent. @@ -305,12 +305,10 @@ def test_agent(n=10): logger.log_tabular('TotalEnvInteracts', t) logger.log_tabular('Q1Vals', with_min_and_max=True) logger.log_tabular('Q2Vals', with_min_and_max=True) - logger.log_tabular('VVals', with_min_and_max=True) logger.log_tabular('LogPi', with_min_and_max=True) logger.log_tabular('LossPi', average_only=True) logger.log_tabular('LossQ1', average_only=True) logger.log_tabular('LossQ2', average_only=True) - logger.log_tabular('LossV', average_only=True) logger.log_tabular('Time', time.time()-start_time) logger.dump_tabular() @@ -318,8 +316,8 @@ def test_agent(n=10): import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='HalfCheetah-v2') - parser.add_argument('--hid', type=int, default=300) - parser.add_argument('--l', type=int, default=1) + parser.add_argument('--hid', type=int, default=256) + parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--epochs', type=int, default=50) @@ -332,4 +330,4 @@ def test_agent(n=10): sac(lambda : gym.make(args.env), actor_critic=core.mlp_actor_critic, ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, seed=args.seed, epochs=args.epochs, - logger_kwargs=logger_kwargs) \ No newline at end of file + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/td3/__init__.py b/spinup/algos/tf1/td3/__init__.py similarity index 100% rename from spinup/algos/td3/__init__.py rename to spinup/algos/tf1/td3/__init__.py diff --git a/spinup/algos/td3/core.py b/spinup/algos/tf1/td3/core.py similarity index 93% rename from spinup/algos/td3/core.py rename to spinup/algos/tf1/td3/core.py index ad444600c..f746ae334 100644 --- a/spinup/algos/td3/core.py +++ b/spinup/algos/tf1/td3/core.py @@ -23,7 +23,7 @@ def count_vars(scope): """ Actor-Critics """ -def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu, +def mlp_actor_critic(x, a, hidden_sizes=(256,256), activation=tf.nn.relu, output_activation=tf.tanh, action_space=None): act_dim = a.shape.as_list()[-1] act_limit = action_space.high[0] @@ -35,4 +35,4 @@ def mlp_actor_critic(x, a, hidden_sizes=(400,300), activation=tf.nn.relu, q2 = tf.squeeze(mlp(tf.concat([x,a], axis=-1), list(hidden_sizes)+[1], activation, None), axis=1) with tf.variable_scope('q1', reuse=True): q1_pi = tf.squeeze(mlp(tf.concat([x,pi], axis=-1), list(hidden_sizes)+[1], activation, None), axis=1) - return pi, q1, q2, q1_pi \ No newline at end of file + return pi, q1, q2, q1_pi diff --git a/spinup/algos/td3/td3.py b/spinup/algos/tf1/td3/td3.py similarity index 86% rename from spinup/algos/td3/td3.py rename to spinup/algos/tf1/td3/td3.py index 3c375ca7c..32257e990 100644 --- a/spinup/algos/td3/td3.py +++ b/spinup/algos/tf1/td3/td3.py @@ -2,8 +2,8 @@ import tensorflow as tf import gym import time -from spinup.algos.td3 import core -from spinup.algos.td3.core import get_vars +from spinup.algos.tf1.td3 import core +from spinup.algos.tf1.td3.core import get_vars from spinup.utils.logx import EpochLogger @@ -37,17 +37,17 @@ def sample_batch(self, batch_size=32): rews=self.rews_buf[idxs], done=self.done_buf[idxs]) -""" -TD3 (Twin Delayed DDPG) -""" def td3(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, - steps_per_epoch=5000, epochs=100, replay_size=int(1e6), gamma=0.99, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, - act_noise=0.1, target_noise=0.2, noise_clip=0.5, policy_delay=2, - max_ep_len=1000, logger_kwargs=dict(), save_freq=1): + update_after=1000, update_every=50, act_noise=0.1, target_noise=0.2, + noise_clip=0.5, policy_delay=2, num_test_episodes=10, max_ep_len=1000, + logger_kwargs=dict(), save_freq=1): """ + Twin Delayed Deep Deterministic Policy Gradient (TD3) + Args: env_fn : A function which creates a copy of the environment. @@ -106,6 +106,15 @@ def td3(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, start_steps (int): Number of steps for uniform-random action selection, before running real policy. Helps exploration. + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + act_noise (float): Stddev for Gaussian exploration noise added to policy at training time. (At test time, no noise is added.) @@ -118,6 +127,9 @@ def td3(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, policy_delay (int): Policy will only be updated once every policy_delay times for each update of the Q-networks. + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + max_ep_len (int): Maximum length of trajectory / episode / rollout. logger_kwargs (dict): Keyword args for EpochLogger. @@ -209,9 +221,9 @@ def get_action(o, noise_scale): a += noise_scale * np.random.randn(act_dim) return np.clip(a, -act_limit, act_limit) - def test_agent(n=10): - for j in range(n): - o, r, d, ep_ret, ep_len = test_env.reset(), 0, False, 0, 0 + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 while not(d or (ep_len == max_ep_len)): # Take deterministic actions at test time (noise_scale=0) o, r, d, _ = test_env.step(get_action(o, 0)) @@ -220,17 +232,15 @@ def test_agent(n=10): logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) start_time = time.time() - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 total_steps = steps_per_epoch * epochs # Main loop: collect experience in env and update/log each epoch for t in range(total_steps): - """ - Until start_steps have elapsed, randomly sample actions - from a uniform distribution for better exploration. Afterwards, - use the learned policy (with some noise, via act_noise). - """ + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy (with some noise, via act_noise). if t > start_steps: a = get_action(o, act_noise) else: @@ -253,13 +263,14 @@ def test_agent(n=10): # most recent observation! o = o2 + # End of trajectory handling if d or (ep_len == max_ep_len): - """ - Perform all TD3 updates at the end of the trajectory - (in accordance with source code of TD3 published by - original authors). - """ - for j in range(ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for j in range(update_every): batch = replay_buffer.sample_batch(batch_size) feed_dict = {x_ph: batch['obs1'], x2_ph: batch['obs2'], @@ -276,15 +287,12 @@ def test_agent(n=10): outs = sess.run([pi_loss, train_pi_op, target_update], feed_dict) logger.store(LossPi=outs[0]) - logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 - # End of epoch wrap-up - if t > 0 and t % steps_per_epoch == 0: - epoch = t // steps_per_epoch + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch # Save model - if (epoch % save_freq == 0) or (epoch == epochs-1): + if (epoch % save_freq == 0) or (epoch == epochs): logger.save_state({'env': env}, None) # Test the performance of the deterministic version of the agent. @@ -308,8 +316,8 @@ def test_agent(n=10): import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='HalfCheetah-v2') - parser.add_argument('--hid', type=int, default=300) - parser.add_argument('--l', type=int, default=1) + parser.add_argument('--hid', type=int, default=256) + parser.add_argument('--l', type=int, default=2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--epochs', type=int, default=50) @@ -322,4 +330,4 @@ def test_agent(n=10): td3(lambda : gym.make(args.env), actor_critic=core.mlp_actor_critic, ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, seed=args.seed, epochs=args.epochs, - logger_kwargs=logger_kwargs) \ No newline at end of file + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/trpo/__init__.py b/spinup/algos/tf1/trpo/__init__.py similarity index 100% rename from spinup/algos/trpo/__init__.py rename to spinup/algos/tf1/trpo/__init__.py diff --git a/spinup/algos/trpo/core.py b/spinup/algos/tf1/trpo/core.py similarity index 99% rename from spinup/algos/trpo/core.py rename to spinup/algos/tf1/trpo/core.py index ea96b041f..6fc865921 100644 --- a/spinup/algos/trpo/core.py +++ b/spinup/algos/tf1/trpo/core.py @@ -158,4 +158,4 @@ def mlp_actor_critic(x, a, hidden_sizes=(64,64), activation=tf.tanh, pi, logp, logp_pi, info, info_phs, d_kl = policy_outs with tf.variable_scope('v'): v = tf.squeeze(mlp(x, list(hidden_sizes)+[1], activation, None), axis=1) - return pi, logp, logp_pi, info, info_phs, d_kl, v \ No newline at end of file + return pi, logp, logp_pi, info, info_phs, d_kl, v diff --git a/spinup/algos/trpo/trpo.py b/spinup/algos/tf1/trpo/trpo.py similarity index 97% rename from spinup/algos/trpo/trpo.py rename to spinup/algos/tf1/trpo/trpo.py index 80af55078..9e09f1348 100644 --- a/spinup/algos/trpo/trpo.py +++ b/spinup/algos/tf1/trpo/trpo.py @@ -2,7 +2,7 @@ import tensorflow as tf import gym import time -import spinup.algos.trpo.core as core +import spinup.algos.tf1.trpo.core as core from spinup.utils.logx import EpochLogger from spinup.utils.mpi_tf import MpiAdamOptimizer, sync_all_params from spinup.utils.mpi_tools import mpi_fork, mpi_avg, proc_id, mpi_statistics_scalar, num_procs @@ -87,19 +87,17 @@ def get(self): return [self.obs_buf, self.act_buf, self.adv_buf, self.ret_buf, self.logp_buf] + core.values_as_sorted_list(self.info_bufs) -""" -Trust Region Policy Optimization -(with support for Natural Policy Gradient) - -""" def trpo(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, steps_per_epoch=4000, epochs=50, gamma=0.99, delta=0.01, vf_lr=1e-3, train_v_iters=80, damping_coeff=0.1, cg_iters=10, backtrack_iters=10, backtrack_coeff=0.8, lam=0.97, max_ep_len=1000, logger_kwargs=dict(), save_freq=10, algo='trpo'): """ + Trust Region Policy Optimization + + (with support for Natural Policy Gradient) Args: env_fn : A function which creates a copy of the environment. @@ -326,7 +324,7 @@ def set_and_eval(step): DeltaLossV=(v_l_new - v_l_old)) start_time = time.time() - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 # Main loop: collect experience in env and update/log each epoch for epoch in range(epochs): @@ -334,25 +332,28 @@ def set_and_eval(step): agent_outs = sess.run(get_action_ops, feed_dict={x_ph: o.reshape(1,-1)}) a, v_t, logp_t, info_t = agent_outs[0][0], agent_outs[1], agent_outs[2], agent_outs[3:] + o2, r, d, _ = env.step(a) + ep_ret += r + ep_len += 1 + # save and log buf.store(o, a, r, v_t, logp_t, info_t) logger.store(VVals=v_t) - o, r, d, _ = env.step(a) - ep_ret += r - ep_len += 1 + # Update obs (critical!) + o = o2 terminal = d or (ep_len == max_ep_len) if terminal or (t==local_steps_per_epoch-1): if not(terminal): print('Warning: trajectory cut off by epoch at %d steps.'%ep_len) # if trajectory didn't reach terminal state, bootstrap value target - last_val = r if d else sess.run(v, feed_dict={x_ph: o.reshape(1,-1)}) + last_val = 0 if d else sess.run(v, feed_dict={x_ph: o.reshape(1,-1)}) buf.finish_path(last_val) if terminal: # only save EpRet / EpLen if trajectory finished logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 # Save model if (epoch % save_freq == 0) or (epoch == epochs-1): @@ -399,4 +400,4 @@ def set_and_eval(step): trpo(lambda : gym.make(args.env), actor_critic=core.mlp_actor_critic, ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs, - logger_kwargs=logger_kwargs) \ No newline at end of file + logger_kwargs=logger_kwargs) diff --git a/spinup/algos/vpg/__init__.py b/spinup/algos/tf1/vpg/__init__.py similarity index 100% rename from spinup/algos/vpg/__init__.py rename to spinup/algos/tf1/vpg/__init__.py diff --git a/spinup/algos/vpg/core.py b/spinup/algos/tf1/vpg/core.py similarity index 99% rename from spinup/algos/vpg/core.py rename to spinup/algos/tf1/vpg/core.py index 03dc0ccd5..4cb360f46 100644 --- a/spinup/algos/vpg/core.py +++ b/spinup/algos/tf1/vpg/core.py @@ -101,4 +101,4 @@ def mlp_actor_critic(x, a, hidden_sizes=(64,64), activation=tf.tanh, pi, logp, logp_pi = policy(x, a, hidden_sizes, activation, output_activation, action_space) with tf.variable_scope('v'): v = tf.squeeze(mlp(x, list(hidden_sizes)+[1], activation, None), axis=1) - return pi, logp, logp_pi, v \ No newline at end of file + return pi, logp, logp_pi, v diff --git a/spinup/algos/vpg/vpg.py b/spinup/algos/tf1/vpg/vpg.py similarity index 96% rename from spinup/algos/vpg/vpg.py rename to spinup/algos/tf1/vpg/vpg.py index 6492c8a25..5d4ecd183 100644 --- a/spinup/algos/vpg/vpg.py +++ b/spinup/algos/tf1/vpg/vpg.py @@ -2,7 +2,7 @@ import tensorflow as tf import gym import time -import spinup.algos.vpg.core as core +import spinup.algos.tf1.vpg.core as core from spinup.utils.logx import EpochLogger from spinup.utils.mpi_tf import MpiAdamOptimizer, sync_all_params from spinup.utils.mpi_tools import mpi_fork, mpi_avg, proc_id, mpi_statistics_scalar, num_procs @@ -82,18 +82,15 @@ def get(self): self.ret_buf, self.logp_buf] -""" -Vanilla Policy Gradient - -(with GAE-Lambda for advantage estimation) - -""" def vpg(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, steps_per_epoch=4000, epochs=50, gamma=0.99, pi_lr=3e-4, vf_lr=1e-3, train_v_iters=80, lam=0.97, max_ep_len=1000, logger_kwargs=dict(), save_freq=10): """ + Vanilla Policy Gradient + + (with GAE-Lambda for advantage estimation) Args: env_fn : A function which creates a copy of the environment. @@ -226,32 +223,35 @@ def update(): DeltaLossV=(v_l_new - v_l_old)) start_time = time.time() - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 # Main loop: collect experience in env and update/log each epoch for epoch in range(epochs): for t in range(local_steps_per_epoch): a, v_t, logp_t = sess.run(get_action_ops, feed_dict={x_ph: o.reshape(1,-1)}) + o2, r, d, _ = env.step(a[0]) + ep_ret += r + ep_len += 1 + # save and log buf.store(o, a, r, v_t, logp_t) logger.store(VVals=v_t) - o, r, d, _ = env.step(a[0]) - ep_ret += r - ep_len += 1 + # Update obs (critical!) + o = o2 terminal = d or (ep_len == max_ep_len) if terminal or (t==local_steps_per_epoch-1): if not(terminal): print('Warning: trajectory cut off by epoch at %d steps.'%ep_len) # if trajectory didn't reach terminal state, bootstrap value target - last_val = r if d else sess.run(v, feed_dict={x_ph: o.reshape(1,-1)}) + last_val = 0 if d else sess.run(v, feed_dict={x_ph: o.reshape(1,-1)}) buf.finish_path(last_val) if terminal: # only save EpRet / EpLen if trajectory finished logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 # Save model if (epoch % save_freq == 0) or (epoch == epochs-1): @@ -297,4 +297,4 @@ def update(): vpg(lambda : gym.make(args.env), actor_critic=core.mlp_actor_critic, ac_kwargs=dict(hidden_sizes=[args.hid]*args.l), gamma=args.gamma, seed=args.seed, steps_per_epoch=args.steps, epochs=args.epochs, - logger_kwargs=logger_kwargs) \ No newline at end of file + logger_kwargs=logger_kwargs) diff --git a/spinup/examples/pytorch/bench_ppo_cartpole.py b/spinup/examples/pytorch/bench_ppo_cartpole.py new file mode 100644 index 000000000..60e202bab --- /dev/null +++ b/spinup/examples/pytorch/bench_ppo_cartpole.py @@ -0,0 +1,19 @@ +from spinup.utils.run_utils import ExperimentGrid +from spinup import ppo_pytorch +import torch + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--cpu', type=int, default=4) + parser.add_argument('--num_runs', type=int, default=3) + args = parser.parse_args() + + eg = ExperimentGrid(name='ppo-pyt-bench') + eg.add('env_name', 'CartPole-v0', '', True) + eg.add('seed', [10*i for i in range(args.num_runs)]) + eg.add('epochs', 10) + eg.add('steps_per_epoch', 4000) + eg.add('ac_kwargs:hidden_sizes', [(32,), (64,64)], 'hid') + eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '') + eg.run(ppo_pytorch, num_cpu=args.cpu) \ No newline at end of file diff --git a/spinup/examples/pytorch/pg_math/1_simple_pg.py b/spinup/examples/pytorch/pg_math/1_simple_pg.py new file mode 100644 index 000000000..af4bbbd34 --- /dev/null +++ b/spinup/examples/pytorch/pg_math/1_simple_pg.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn +from torch.distributions.categorical import Categorical +from torch.optim import Adam +import numpy as np +import gym +from gym.spaces import Discrete, Box + +def mlp(sizes, activation=nn.Tanh, output_activation=nn.Identity): + # Build a feedforward neural network. + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + +def train(env_name='CartPole-v0', hidden_sizes=[32], lr=1e-2, + epochs=50, batch_size=5000, render=False): + + # make environment, check spaces, get obs / act dims + env = gym.make(env_name) + assert isinstance(env.observation_space, Box), \ + "This example only works for envs with continuous state spaces." + assert isinstance(env.action_space, Discrete), \ + "This example only works for envs with discrete action spaces." + + obs_dim = env.observation_space.shape[0] + n_acts = env.action_space.n + + # make core of policy network + logits_net = mlp(sizes=[obs_dim]+hidden_sizes+[n_acts]) + + # make function to compute action distribution + def get_policy(obs): + logits = logits_net(obs) + return Categorical(logits=logits) + + # make action selection function (outputs int actions, sampled from policy) + def get_action(obs): + return get_policy(obs).sample().item() + + # make loss function whose gradient, for the right data, is policy gradient + def compute_loss(obs, act, weights): + logp = get_policy(obs).log_prob(act) + return -(logp * weights).mean() + + # make optimizer + optimizer = Adam(logits_net.parameters(), lr=lr) + + # for training policy + def train_one_epoch(): + # make some empty lists for logging. + batch_obs = [] # for observations + batch_acts = [] # for actions + batch_weights = [] # for R(tau) weighting in policy gradient + batch_rets = [] # for measuring episode returns + batch_lens = [] # for measuring episode lengths + + # reset episode-specific variables + obs = env.reset() # first obs comes from starting distribution + done = False # signal from environment that episode is over + ep_rews = [] # list for rewards accrued throughout ep + + # render first episode of each epoch + finished_rendering_this_epoch = False + + # collect experience by acting in the environment with current policy + while True: + + # rendering + if (not finished_rendering_this_epoch) and render: + env.render() + + # save obs + batch_obs.append(obs.copy()) + + # act in the environment + act = get_action(torch.as_tensor(obs, dtype=torch.float32)) + obs, rew, done, _ = env.step(act) + + # save action, reward + batch_acts.append(act) + ep_rews.append(rew) + + if done: + # if episode is over, record info about episode + ep_ret, ep_len = sum(ep_rews), len(ep_rews) + batch_rets.append(ep_ret) + batch_lens.append(ep_len) + + # the weight for each logprob(a|s) is R(tau) + batch_weights += [ep_ret] * ep_len + + # reset episode-specific variables + obs, done, ep_rews = env.reset(), False, [] + + # won't render again this epoch + finished_rendering_this_epoch = True + + # end experience loop if we have enough of it + if len(batch_obs) > batch_size: + break + + # take a single policy gradient update step + optimizer.zero_grad() + batch_loss = compute_loss(obs=torch.as_tensor(batch_obs, dtype=torch.float32), + act=torch.as_tensor(batch_acts, dtype=torch.int32), + weights=torch.as_tensor(batch_weights, dtype=torch.float32) + ) + batch_loss.backward() + optimizer.step() + return batch_loss, batch_rets, batch_lens + + # training loop + for i in range(epochs): + batch_loss, batch_rets, batch_lens = train_one_epoch() + print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'% + (i, batch_loss, np.mean(batch_rets), np.mean(batch_lens))) + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env_name', '--env', type=str, default='CartPole-v0') + parser.add_argument('--render', action='store_true') + parser.add_argument('--lr', type=float, default=1e-2) + args = parser.parse_args() + print('\nUsing simplest formulation of policy gradient.\n') + train(env_name=args.env_name, render=args.render, lr=args.lr) \ No newline at end of file diff --git a/spinup/examples/pytorch/pg_math/2_rtg_pg.py b/spinup/examples/pytorch/pg_math/2_rtg_pg.py new file mode 100644 index 000000000..5fc5dcfd5 --- /dev/null +++ b/spinup/examples/pytorch/pg_math/2_rtg_pg.py @@ -0,0 +1,135 @@ +import torch +import torch.nn as nn +from torch.distributions.categorical import Categorical +from torch.optim import Adam +import numpy as np +import gym +from gym.spaces import Discrete, Box + +def mlp(sizes, activation=nn.Tanh, output_activation=nn.Identity): + # Build a feedforward neural network. + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + +def reward_to_go(rews): + n = len(rews) + rtgs = np.zeros_like(rews) + for i in reversed(range(n)): + rtgs[i] = rews[i] + (rtgs[i+1] if i+1 < n else 0) + return rtgs + +def train(env_name='CartPole-v0', hidden_sizes=[32], lr=1e-2, + epochs=50, batch_size=5000, render=False): + + # make environment, check spaces, get obs / act dims + env = gym.make(env_name) + assert isinstance(env.observation_space, Box), \ + "This example only works for envs with continuous state spaces." + assert isinstance(env.action_space, Discrete), \ + "This example only works for envs with discrete action spaces." + + obs_dim = env.observation_space.shape[0] + n_acts = env.action_space.n + + # make core of policy network + logits_net = mlp(sizes=[obs_dim]+hidden_sizes+[n_acts]) + + # make function to compute action distribution + def get_policy(obs): + logits = logits_net(obs) + return Categorical(logits=logits) + + # make action selection function (outputs int actions, sampled from policy) + def get_action(obs): + return get_policy(obs).sample().item() + + # make loss function whose gradient, for the right data, is policy gradient + def compute_loss(obs, act, weights): + logp = get_policy(obs).log_prob(act) + return -(logp * weights).mean() + + # make optimizer + optimizer = Adam(logits_net.parameters(), lr=lr) + + # for training policy + def train_one_epoch(): + # make some empty lists for logging. + batch_obs = [] # for observations + batch_acts = [] # for actions + batch_weights = [] # for reward-to-go weighting in policy gradient + batch_rets = [] # for measuring episode returns + batch_lens = [] # for measuring episode lengths + + # reset episode-specific variables + obs = env.reset() # first obs comes from starting distribution + done = False # signal from environment that episode is over + ep_rews = [] # list for rewards accrued throughout ep + + # render first episode of each epoch + finished_rendering_this_epoch = False + + # collect experience by acting in the environment with current policy + while True: + + # rendering + if (not finished_rendering_this_epoch) and render: + env.render() + + # save obs + batch_obs.append(obs.copy()) + + # act in the environment + act = get_action(torch.as_tensor(obs, dtype=torch.float32)) + obs, rew, done, _ = env.step(act) + + # save action, reward + batch_acts.append(act) + ep_rews.append(rew) + + if done: + # if episode is over, record info about episode + ep_ret, ep_len = sum(ep_rews), len(ep_rews) + batch_rets.append(ep_ret) + batch_lens.append(ep_len) + + # the weight for each logprob(a_t|s_t) is reward-to-go from t + batch_weights += list(reward_to_go(ep_rews)) + + # reset episode-specific variables + obs, done, ep_rews = env.reset(), False, [] + + # won't render again this epoch + finished_rendering_this_epoch = True + + # end experience loop if we have enough of it + if len(batch_obs) > batch_size: + break + + # take a single policy gradient update step + optimizer.zero_grad() + batch_loss = compute_loss(obs=torch.as_tensor(batch_obs, dtype=torch.float32), + act=torch.as_tensor(batch_acts, dtype=torch.int32), + weights=torch.as_tensor(batch_weights, dtype=torch.float32) + ) + batch_loss.backward() + optimizer.step() + return batch_loss, batch_rets, batch_lens + + # training loop + for i in range(epochs): + batch_loss, batch_rets, batch_lens = train_one_epoch() + print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'% + (i, batch_loss, np.mean(batch_rets), np.mean(batch_lens))) + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env_name', '--env', type=str, default='CartPole-v0') + parser.add_argument('--render', action='store_true') + parser.add_argument('--lr', type=float, default=1e-2) + args = parser.parse_args() + print('\nUsing reward-to-go formulation of policy gradient.\n') + train(env_name=args.env_name, render=args.render, lr=args.lr) \ No newline at end of file diff --git a/spinup/examples/bench_ppo_cartpole.py b/spinup/examples/tf1/bench_ppo_cartpole.py similarity index 84% rename from spinup/examples/bench_ppo_cartpole.py rename to spinup/examples/tf1/bench_ppo_cartpole.py index 1c735664e..8f0f071a6 100644 --- a/spinup/examples/bench_ppo_cartpole.py +++ b/spinup/examples/tf1/bench_ppo_cartpole.py @@ -1,5 +1,5 @@ from spinup.utils.run_utils import ExperimentGrid -from spinup import ppo +from spinup import ppo_tf1 import tensorflow as tf if __name__ == '__main__': @@ -9,11 +9,11 @@ parser.add_argument('--num_runs', type=int, default=3) args = parser.parse_args() - eg = ExperimentGrid(name='ppo-bench') + eg = ExperimentGrid(name='ppo-tf1-bench') eg.add('env_name', 'CartPole-v0', '', True) eg.add('seed', [10*i for i in range(args.num_runs)]) eg.add('epochs', 10) eg.add('steps_per_epoch', 4000) eg.add('ac_kwargs:hidden_sizes', [(32,), (64,64)], 'hid') eg.add('ac_kwargs:activation', [tf.tanh, tf.nn.relu], '') - eg.run(ppo, num_cpu=args.cpu) \ No newline at end of file + eg.run(ppo_tf1, num_cpu=args.cpu) \ No newline at end of file diff --git a/spinup/examples/pg_math/1_simple_pg.py b/spinup/examples/tf1/pg_math/1_simple_pg.py similarity index 100% rename from spinup/examples/pg_math/1_simple_pg.py rename to spinup/examples/tf1/pg_math/1_simple_pg.py diff --git a/spinup/examples/pg_math/2_rtg_pg.py b/spinup/examples/tf1/pg_math/2_rtg_pg.py similarity index 100% rename from spinup/examples/pg_math/2_rtg_pg.py rename to spinup/examples/tf1/pg_math/2_rtg_pg.py diff --git a/spinup/examples/train_mnist.py b/spinup/examples/tf1/train_mnist.py similarity index 100% rename from spinup/examples/train_mnist.py rename to spinup/examples/tf1/train_mnist.py diff --git a/spinup/exercises/problem_set_2/exercise2_3.py b/spinup/exercises/problem_set_2/exercise2_3.py deleted file mode 100644 index 4df22f82b..000000000 --- a/spinup/exercises/problem_set_2/exercise2_3.py +++ /dev/null @@ -1,317 +0,0 @@ -import numpy as np -import tensorflow as tf -import gym -import time -from spinup.algos.td3 import core -from spinup.algos.td3.td3 import ReplayBuffer -from spinup.algos.td3.core import get_vars -from spinup.utils.logx import EpochLogger -from spinup.utils.run_utils import ExperimentGrid - - -""" - -Exercise 2.3: Details Matter - -In this exercise, you will run TD3 with a tiny implementation difference, -pertaining to how target actions are calculated. Your goal is to determine -whether or not there is any change in performance, and if so, explain why. - -You do NOT need to write code for this exercise. - -""" - -def td3(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, - steps_per_epoch=5000, epochs=100, replay_size=int(1e6), gamma=0.99, - polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, - act_noise=0.1, target_noise=0.2, noise_clip=0.5, policy_delay=2, - max_ep_len=1000, logger_kwargs=dict(), save_freq=1, - remove_action_clip=False): - """ - - Args: - env_fn : A function which creates a copy of the environment. - The environment must satisfy the OpenAI Gym API. - - actor_critic: A function which takes in placeholder symbols - for state, ``x_ph``, and action, ``a_ph``, and returns the main - outputs from the agent's Tensorflow computation graph: - - =========== ================ ====================================== - Symbol Shape Description - =========== ================ ====================================== - ``pi`` (batch, act_dim) | Deterministically computes actions - | from policy given states. - ``q1`` (batch,) | Gives one estimate of Q* for - | states in ``x_ph`` and actions in - | ``a_ph``. - ``q2`` (batch,) | Gives another estimate of Q* for - | states in ``x_ph`` and actions in - | ``a_ph``. - ``q1_pi`` (batch,) | Gives the composition of ``q1`` and - | ``pi`` for states in ``x_ph``: - | q1(x, pi(x)). - =========== ================ ====================================== - - ac_kwargs (dict): Any kwargs appropriate for the actor_critic - function you provided to TD3. - - seed (int): Seed for random number generators. - - steps_per_epoch (int): Number of steps of interaction (state-action pairs) - for the agent and the environment in each epoch. - - epochs (int): Number of epochs to run and train agent. - - replay_size (int): Maximum length of replay buffer. - - gamma (float): Discount factor. (Always between 0 and 1.) - - polyak (float): Interpolation factor in polyak averaging for target - networks. Target networks are updated towards main networks - according to: - - .. math:: \\theta_{\\text{targ}} \\leftarrow - \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta - - where :math:`\\rho` is polyak. (Always between 0 and 1, usually - close to 1.) - - pi_lr (float): Learning rate for policy. - - q_lr (float): Learning rate for Q-networks. - - batch_size (int): Minibatch size for SGD. - - start_steps (int): Number of steps for uniform-random action selection, - before running real policy. Helps exploration. - - act_noise (float): Stddev for Gaussian exploration noise added to - policy at training time. (At test time, no noise is added.) - - target_noise (float): Stddev for smoothing noise added to target - policy. - - noise_clip (float): Limit for absolute value of target policy - smoothing noise. - - policy_delay (int): Policy will only be updated once every - policy_delay times for each update of the Q-networks. - - max_ep_len (int): Maximum length of trajectory / episode / rollout. - - logger_kwargs (dict): Keyword args for EpochLogger. - - save_freq (int): How often (in terms of gap between epochs) to save - the current policy and value function. - - remove_action_clip (bool): Special arg for this exercise. Controls - whether or not to clip the target action after adding noise to it. - - """ - - logger = EpochLogger(**logger_kwargs) - logger.save_config(locals()) - - tf.set_random_seed(seed) - np.random.seed(seed) - - env, test_env = env_fn(), env_fn() - obs_dim = env.observation_space.shape[0] - act_dim = env.action_space.shape[0] - - # Action limit for clamping: critically, assumes all dimensions share the same bound! - act_limit = env.action_space.high[0] - - # Share information about action space with policy architecture - ac_kwargs['action_space'] = env.action_space - - # Inputs to computation graph - x_ph, a_ph, x2_ph, r_ph, d_ph = core.placeholders(obs_dim, act_dim, obs_dim, None, None) - - # Main outputs from computation graph - with tf.variable_scope('main'): - pi, q1, q2, q1_pi = actor_critic(x_ph, a_ph, **ac_kwargs) - - # Target policy network - with tf.variable_scope('target'): - pi_targ, _, _, _ = actor_critic(x2_ph, a_ph, **ac_kwargs) - - # Target Q networks - with tf.variable_scope('target', reuse=True): - - # Target policy smoothing, by adding clipped noise to target actions - epsilon = tf.random_normal(tf.shape(pi_targ), stddev=target_noise) - epsilon = tf.clip_by_value(epsilon, -noise_clip, noise_clip) - a2 = pi_targ + epsilon - if not(remove_action_clip): - a2 = tf.clip_by_value(a2, -act_limit, act_limit) - - # Target Q-values, using action from target policy - _, q1_targ, q2_targ, _ = actor_critic(x2_ph, a2, **ac_kwargs) - - # Experience buffer - replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size) - - # Count variables - var_counts = tuple(core.count_vars(scope) for scope in ['main/pi', 'main/q1', 'main/q2', 'main']) - print('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d, \t total: %d\n'%var_counts) - - # Bellman backup for Q functions, using Clipped Double-Q targets - min_q_targ = tf.minimum(q1_targ, q2_targ) - backup = tf.stop_gradient(r_ph + gamma*(1-d_ph)*min_q_targ) - - # TD3 losses - pi_loss = -tf.reduce_mean(q1_pi) - q1_loss = tf.reduce_mean((q1-backup)**2) - q2_loss = tf.reduce_mean((q2-backup)**2) - q_loss = q1_loss + q2_loss - - # Separate train ops for pi, q - pi_optimizer = tf.train.AdamOptimizer(learning_rate=pi_lr) - q_optimizer = tf.train.AdamOptimizer(learning_rate=q_lr) - train_pi_op = pi_optimizer.minimize(pi_loss, var_list=get_vars('main/pi')) - train_q_op = q_optimizer.minimize(q_loss, var_list=get_vars('main/q')) - - # Polyak averaging for target variables - target_update = tf.group([tf.assign(v_targ, polyak*v_targ + (1-polyak)*v_main) - for v_main, v_targ in zip(get_vars('main'), get_vars('target'))]) - - # Initializing targets to match main variables - target_init = tf.group([tf.assign(v_targ, v_main) - for v_main, v_targ in zip(get_vars('main'), get_vars('target'))]) - - sess = tf.Session() - sess.run(tf.global_variables_initializer()) - sess.run(target_init) - - # Setup model saving - logger.setup_tf_saver(sess, inputs={'x': x_ph, 'a': a_ph}, outputs={'pi': pi, 'q1': q1, 'q2': q2}) - - def get_action(o, noise_scale): - a = sess.run(pi, feed_dict={x_ph: o.reshape(1,-1)}) - a += noise_scale * np.random.randn(act_dim) - return np.clip(a, -act_limit, act_limit) - - def test_agent(n=10): - for j in range(n): - o, r, d, ep_ret, ep_len = test_env.reset(), 0, False, 0, 0 - while not(d or (ep_len == max_ep_len)): - # Take deterministic actions at test time (noise_scale=0) - o, r, d, _ = test_env.step(get_action(o, 0)) - ep_ret += r - ep_len += 1 - logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) - - start_time = time.time() - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 - total_steps = steps_per_epoch * epochs - - # Main loop: collect experience in env and update/log each epoch - for t in range(total_steps): - - """ - Until start_steps have elapsed, randomly sample actions - from a uniform distribution for better exploration. Afterwards, - use the learned policy (with some noise, via act_noise). - """ - if t > start_steps: - a = get_action(o, act_noise) - else: - a = env.action_space.sample() - - # Step the env - o2, r, d, _ = env.step(a) - ep_ret += r - ep_len += 1 - - # Ignore the "done" signal if it comes from hitting the time - # horizon (that is, when it's an artificial terminal signal - # that isn't based on the agent's state) - d = False if ep_len==max_ep_len else d - - # Store experience to replay buffer - replay_buffer.store(o, a, r, o2, d) - - # Super critical, easy to overlook step: make sure to update - # most recent observation! - o = o2 - - if d or (ep_len == max_ep_len): - """ - Perform all TD3 updates at the end of the trajectory - (in accordance with source code of TD3 published by - original authors). - """ - for j in range(ep_len): - batch = replay_buffer.sample_batch(batch_size) - feed_dict = {x_ph: batch['obs1'], - x2_ph: batch['obs2'], - a_ph: batch['acts'], - r_ph: batch['rews'], - d_ph: batch['done'] - } - q_step_ops = [q_loss, q1, q2, train_q_op] - outs = sess.run(q_step_ops, feed_dict) - logger.store(LossQ=outs[0], Q1Vals=outs[1], Q2Vals=outs[2]) - - if j % policy_delay == 0: - # Delayed policy update - outs = sess.run([pi_loss, train_pi_op, target_update], feed_dict) - logger.store(LossPi=outs[0]) - - logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 - - # End of epoch wrap-up - if t > 0 and t % steps_per_epoch == 0: - epoch = t // steps_per_epoch - - # Save model - if (epoch % save_freq == 0) or (epoch == epochs-1): - logger.save_state({'env': env}, None) - - # Test the performance of the deterministic version of the agent. - test_agent() - - # Log info about epoch - logger.log_tabular('Epoch', epoch) - logger.log_tabular('EpRet', with_min_and_max=True) - logger.log_tabular('TestEpRet', with_min_and_max=True) - logger.log_tabular('EpLen', average_only=True) - logger.log_tabular('TestEpLen', average_only=True) - logger.log_tabular('TotalEnvInteracts', t) - logger.log_tabular('Q1Vals', with_min_and_max=True) - logger.log_tabular('Q2Vals', with_min_and_max=True) - logger.log_tabular('LossPi', average_only=True) - logger.log_tabular('LossQ', average_only=True) - logger.log_tabular('Time', time.time()-start_time) - logger.dump_tabular() - -if __name__ == '__main__': - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--env', type=str, default='HalfCheetah-v2') - parser.add_argument('--h', type=int, default=300) - parser.add_argument('--l', type=int, default=1) - parser.add_argument('--num_runs', '-n', type=int, default=3) - parser.add_argument('--steps_per_epoch', '-s', type=int, default=5000) - parser.add_argument('--total_steps', '-t', type=int, default=int(5e4)) - args = parser.parse_args() - - def td3_with_actor_critic(**kwargs): - td3(ac_kwargs=dict(hidden_sizes=[args.h]*args.l), - start_steps=5000, - max_ep_len=150, - batch_size=64, - polyak=0.95, - **kwargs) - - eg = ExperimentGrid(name='ex2-3_td3') - eg.add('replay_size', int(args.total_steps)) - eg.add('env_name', args.env, '', True) - eg.add('seed', [10*i for i in range(args.num_runs)]) - eg.add('epochs', int(args.total_steps / args.steps_per_epoch)) - eg.add('steps_per_epoch', args.steps_per_epoch) - eg.add('remove_action_clip', [False, True]) - eg.run(td3_with_actor_critic, datestamp=True) \ No newline at end of file diff --git a/spinup/exercises/pytorch/problem_set_1/exercise1_1.py b/spinup/exercises/pytorch/problem_set_1/exercise1_1.py new file mode 100644 index 000000000..2590288ae --- /dev/null +++ b/spinup/exercises/pytorch/problem_set_1/exercise1_1.py @@ -0,0 +1,55 @@ +import torch +import numpy as np + +""" + +Exercise 1.1: Diagonal Gaussian Likelihood + +Write a function that takes in PyTorch Tensors for the means and +log stds of a batch of diagonal Gaussian distributions, along with a +PyTorch Tensor for (previously-generated) samples from those +distributions, and returns a Tensor containing the log +likelihoods of those samples. + +""" + +def gaussian_likelihood(x, mu, log_std): + """ + Args: + x: Tensor with shape [batch, dim] + mu: Tensor with shape [batch, dim] + log_std: Tensor with shape [batch, dim] or [dim] + + Returns: + Tensor with shape [batch] + """ + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + return torch.zeros(1) + + +if __name__ == '__main__': + """ + Run this file to verify your solution. + """ + from spinup.exercises.pytorch.problem_set_1_solutions import exercise1_1_soln + from spinup.exercises.common import print_result + + batch_size = 32 + dim = 10 + + x = torch.rand(batch_size, dim) + mu = torch.rand(batch_size, dim) + log_std = torch.rand(dim) + + your_gaussian_likelihood = gaussian_likelihood(x, mu, log_std) + true_gaussian_likelihood = exercise1_1_soln.gaussian_likelihood(x, mu, log_std) + + your_result = your_gaussian_likelihood.detach().numpy() + true_result = true_gaussian_likelihood.detach().numpy() + + correct = np.allclose(your_result, true_result) + print_result(correct) \ No newline at end of file diff --git a/spinup/exercises/pytorch/problem_set_1/exercise1_2.py b/spinup/exercises/pytorch/problem_set_1/exercise1_2.py new file mode 100644 index 000000000..ab01559bd --- /dev/null +++ b/spinup/exercises/pytorch/problem_set_1/exercise1_2.py @@ -0,0 +1,134 @@ +import torch +import torch.nn as nn +import numpy as np +from spinup.exercises.pytorch.problem_set_1 import exercise1_1 +from spinup.exercises.pytorch.problem_set_1 import exercise1_2_auxiliary + +""" + +Exercise 1.2: PPO Gaussian Policy + +You will implement an MLP diagonal Gaussian policy for PPO by +writing an MLP-builder, and a few other key functions. + +Log-likelihoods will be computed using your answer to Exercise 1.1, +so make sure to complete that exercise before beginning this one. + +""" + +def mlp(sizes, activation, output_activation=nn.Identity): + """ + Build a multi-layer perceptron in PyTorch. + + Args: + sizes: Tuple, list, or other iterable giving the number of units + for each layer of the MLP. + + activation: Activation function for all layers except last. + + output_activation: Activation function for last layer. + + Returns: + A PyTorch module that can be called to give the output of the MLP. + (Use an nn.Sequential module.) + + """ + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + pass + +class DiagonalGaussianDistribution: + + def __init__(self, mu, log_std): + self.mu = mu + self.log_std = log_std + + def sample(self): + """ + Returns: + A PyTorch Tensor of samples from the diagonal Gaussian distribution with + mean and log_std given by self.mu and self.log_std. + """ + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + pass + + #================================(Given, ignore)==========================================# + def log_prob(self, value): + return exercise1_1.gaussian_likelihood(value, self.mu, self.log_std) + + def entropy(self): + return 0.5 + 0.5 * np.log(2 * np.pi) + self.log_std.sum(axis=-1) + #=========================================================================================# + + +class MLPGaussianActor(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + """ + Initialize an MLP Gaussian Actor by making a PyTorch module for computing the + mean of the distribution given a batch of observations, and a log_std parameter. + + Make log_std a PyTorch Parameter with the same shape as the action vector, + independent of observations, initialized to [-0.5, -0.5, ..., -0.5]. + (Make sure it's trainable!) + """ + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + # self.log_std = + # self.mu_net = + pass + + #================================(Given, ignore)==========================================# + def forward(self, obs, act=None): + mu = self.mu_net(obs) + pi = DiagonalGaussianDistribution(mu, self.log_std) + logp_a = None + if act is not None: + logp_a = pi.log_prob(act) + return pi, logp_a + #=========================================================================================# + + + +if __name__ == '__main__': + """ + Run this file to verify your solution. + """ + + from spinup import ppo_pytorch as ppo + from spinup.exercises.common import print_result + from functools import partial + import gym + import os + import pandas as pd + import psutil + import time + + logdir = "/tmp/experiments/%i"%int(time.time()) + + ActorCritic = partial(exercise1_2_auxiliary.ExerciseActorCritic, actor=MLPGaussianActor) + + ppo(env_fn = lambda : gym.make('InvertedPendulum-v2'), + actor_critic=ActorCritic, + ac_kwargs=dict(hidden_sizes=(64,)), + steps_per_epoch=4000, epochs=20, logger_kwargs=dict(output_dir=logdir)) + + # Get scores from last five epochs to evaluate success. + data = pd.read_table(os.path.join(logdir,'progress.txt')) + last_scores = data['AverageEpRet'][-5:] + + # Your implementation is probably correct if the agent has a score >500, + # or if it reaches the top possible score of 1000, in the last five epochs. + correct = np.mean(last_scores) > 500 or np.max(last_scores)==1e3 + print_result(correct) \ No newline at end of file diff --git a/spinup/exercises/pytorch/problem_set_1/exercise1_2_auxiliary.py b/spinup/exercises/pytorch/problem_set_1/exercise1_2_auxiliary.py new file mode 100644 index 000000000..958502c6e --- /dev/null +++ b/spinup/exercises/pytorch/problem_set_1/exercise1_2_auxiliary.py @@ -0,0 +1,54 @@ +import torch +import torch.nn as nn +import numpy as np + +""" + +Auxiliary code for Exercise 1.2. No part of the exercise requires you to +look into or modify this file (and since it contains an mlp function, +it has spoilers for the answer). Removed from the main file to avoid +cluttering it up. + +In other words, nothing to see here, move along, these are not the +droids you're looking for, and all that... + +""" + +def mlp(sizes, activation, output_activation=nn.Identity): + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + + +class MLPCritic(nn.Module): + + def __init__(self, obs_dim, hidden_sizes, activation): + super().__init__() + self.v_net = mlp([obs_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs): + return torch.squeeze(self.v_net(obs), -1) # Critical to ensure v has right shape. + + +class ExerciseActorCritic(nn.Module): + + def __init__(self, observation_space, action_space, + hidden_sizes=(64,64), activation=nn.Tanh, + actor=None): + super().__init__() + obs_dim = observation_space.shape[0] + self.pi = actor(obs_dim, action_space.shape[0], hidden_sizes, activation) + self.v = MLPCritic(obs_dim, hidden_sizes, activation) + + def step(self, obs): + with torch.no_grad(): + pi, _ = self.pi(obs) + a = pi.sample() + logp_a = pi.log_prob(a) + v = self.v(obs) + return a.numpy(), v.numpy(), logp_a.numpy() + + def act(self, obs): + return self.step(obs)[0] \ No newline at end of file diff --git a/spinup/exercises/pytorch/problem_set_1/exercise1_3.py b/spinup/exercises/pytorch/problem_set_1/exercise1_3.py new file mode 100644 index 000000000..d2fca5c0d --- /dev/null +++ b/spinup/exercises/pytorch/problem_set_1/exercise1_3.py @@ -0,0 +1,416 @@ +from copy import deepcopy +import itertools +import numpy as np +import torch +from torch.optim import Adam +import gym +import time +import spinup.algos.pytorch.td3.core as core +from spinup.algos.pytorch.td3.td3 import td3 as true_td3 +from spinup.utils.logx import EpochLogger + +""" + +Exercise 1.3: TD3 Computation Graph + +Implement the core computation graph for the TD3 algorithm. + +As starter code, you are given the entirety of the TD3 algorithm except +for the computation graph. Find "YOUR CODE HERE" to begin. + +To clarify: you will not write an "actor_critic" function for this +exercise. But you will use one to build the graph for computing the +TD3 updates. + +""" + +class ReplayBuffer: + """ + A simple FIFO experience replay buffer for TD3 agents. + """ + + def __init__(self, obs_dim, act_dim, size): + self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) + self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) + self.rew_buf = np.zeros(size, dtype=np.float32) + self.done_buf = np.zeros(size, dtype=np.float32) + self.ptr, self.size, self.max_size = 0, 0, size + + def store(self, obs, act, rew, next_obs, done): + self.obs_buf[self.ptr] = obs + self.obs2_buf[self.ptr] = next_obs + self.act_buf[self.ptr] = act + self.rew_buf[self.ptr] = rew + self.done_buf[self.ptr] = done + self.ptr = (self.ptr+1) % self.max_size + self.size = min(self.size+1, self.max_size) + + def sample_batch(self, batch_size=32): + idxs = np.random.randint(0, self.size, size=batch_size) + batch = dict(obs=self.obs_buf[idxs], + obs2=self.obs2_buf[idxs], + act=self.act_buf[idxs], + rew=self.rew_buf[idxs], + done=self.done_buf[idxs]) + return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()} + + + +def td3(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, + polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, + update_after=1000, update_every=50, act_noise=0.1, target_noise=0.2, + noise_clip=0.5, policy_delay=2, num_test_episodes=10, max_ep_len=1000, + logger_kwargs=dict(), save_freq=1): + """ + Twin Delayed Deep Deterministic Policy Gradient (TD3) + + + Args: + env_fn : A function which creates a copy of the environment. + The environment must satisfy the OpenAI Gym API. + + actor_critic: The constructor method for a PyTorch Module with an ``act`` + method, a ``pi`` module, a ``q1`` module, and a ``q2`` module. + The ``act`` method and ``pi`` module should accept batches of + observations as inputs, and ``q1`` and ``q2`` should accept a batch + of observations and a batch of actions as inputs. When called, + these should return: + + =========== ================ ====================================== + Call Output Shape Description + =========== ================ ====================================== + ``act`` (batch, act_dim) | Numpy array of actions for each + | observation. + ``pi`` (batch, act_dim) | Tensor containing actions from policy + | given observations. + ``q1`` (batch,) | Tensor containing one current estimate + | of Q* for the provided observations + | and actions. (Critical: make sure to + | flatten this!) + ``q2`` (batch,) | Tensor containing the other current + | estimate of Q* for the provided observations + | and actions. (Critical: make sure to + | flatten this!) + =========== ================ ====================================== + + ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object + you provided to TD3. + + seed (int): Seed for random number generators. + + steps_per_epoch (int): Number of steps of interaction (state-action pairs) + for the agent and the environment in each epoch. + + epochs (int): Number of epochs to run and train agent. + + replay_size (int): Maximum length of replay buffer. + + gamma (float): Discount factor. (Always between 0 and 1.) + + polyak (float): Interpolation factor in polyak averaging for target + networks. Target networks are updated towards main networks + according to: + + .. math:: \\theta_{\\text{targ}} \\leftarrow + \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta + + where :math:`\\rho` is polyak. (Always between 0 and 1, usually + close to 1.) + + pi_lr (float): Learning rate for policy. + + q_lr (float): Learning rate for Q-networks. + + batch_size (int): Minibatch size for SGD. + + start_steps (int): Number of steps for uniform-random action selection, + before running real policy. Helps exploration. + + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + + act_noise (float): Stddev for Gaussian exploration noise added to + policy at training time. (At test time, no noise is added.) + + target_noise (float): Stddev for smoothing noise added to target + policy. + + noise_clip (float): Limit for absolute value of target policy + smoothing noise. + + policy_delay (int): Policy will only be updated once every + policy_delay times for each update of the Q-networks. + + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + + max_ep_len (int): Maximum length of trajectory / episode / rollout. + + logger_kwargs (dict): Keyword args for EpochLogger. + + save_freq (int): How often (in terms of gap between epochs) to save + the current policy and value function. + + """ + + logger = EpochLogger(**logger_kwargs) + logger.save_config(locals()) + + torch.manual_seed(seed) + np.random.seed(seed) + + env, test_env = env_fn(), env_fn() + obs_dim = env.observation_space.shape + act_dim = env.action_space.shape[0] + + # Action limit for clamping: critically, assumes all dimensions share the same bound! + act_limit = env.action_space.high[0] + + # Create actor-critic module and target networks + ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs) + ac_targ = deepcopy(ac) + + # Freeze target networks with respect to optimizers (only update via polyak averaging) + for p in ac_targ.parameters(): + p.requires_grad = False + + # List of parameters for both Q-networks (save this for convenience) + q_params = itertools.chain(ac.q1.parameters(), ac.q2.parameters()) + + # Experience buffer + replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size) + + # Count variables (protip: try to get a feel for how different size networks behave!) + var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.q1, ac.q2]) + logger.log('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d\n'%var_counts) + + + #=========================================================================# + # # + # All of your code goes in the space below. # + # # + #=========================================================================# + + # Set up function for computing TD3 Q-losses + def compute_loss_q(data): + o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done'] + + # Q-values + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + # q1 = + # q2 = + + # Target policy smoothing + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + + # Target Q-values + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + + # MSE loss against Bellman backup + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + # loss_q1 = + # loss_q2 = + # loss_q = + + # Useful info for logging + loss_info = dict(Q1Vals=q1.detach().numpy(), + Q2Vals=q2.detach().numpy()) + + return loss_q, loss_info + + # Set up function for computing TD3 pi loss + def compute_loss_pi(data): + ####################### + # # + # YOUR CODE HERE # + # # + ####################### + # loss_pi = + return loss_pi + + #=========================================================================# + # # + # All of your code goes in the space above. # + # # + #=========================================================================# + + # Set up optimizers for policy and q-function + pi_optimizer = Adam(ac.pi.parameters(), lr=pi_lr) + q_optimizer = Adam(q_params, lr=q_lr) + + # Set up model saving + logger.setup_pytorch_saver(ac) + + def update(data, timer): + # First run one gradient descent step for Q1 and Q2 + q_optimizer.zero_grad() + loss_q, loss_info = compute_loss_q(data) + loss_q.backward() + q_optimizer.step() + + # Record things + logger.store(LossQ=loss_q.item(), **loss_info) + + # Possibly update pi and target networks + if timer % policy_delay == 0: + + # Freeze Q-networks so you don't waste computational effort + # computing gradients for them during the policy learning step. + for p in q_params: + p.requires_grad = False + + # Next run one gradient descent step for pi. + pi_optimizer.zero_grad() + loss_pi = compute_loss_pi(data) + loss_pi.backward() + pi_optimizer.step() + + # Unfreeze Q-networks so you can optimize it at next DDPG step. + for p in q_params: + p.requires_grad = True + + # Record things + logger.store(LossPi=loss_pi.item()) + + # Finally, update target networks by polyak averaging. + with torch.no_grad(): + for p, p_targ in zip(ac.parameters(), ac_targ.parameters()): + # NB: We use an in-place operations "mul_", "add_" to update target + # params, as opposed to "mul" and "add", which would make new tensors. + p_targ.data.mul_(polyak) + p_targ.data.add_((1 - polyak) * p.data) + + def get_action(o, noise_scale): + a = ac.act(torch.as_tensor(o, dtype=torch.float32)) + a += noise_scale * np.random.randn(act_dim) + return np.clip(a, -act_limit, act_limit) + + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 + while not(d or (ep_len == max_ep_len)): + # Take deterministic actions at test time (noise_scale=0) + o, r, d, _ = test_env.step(get_action(o, 0)) + ep_ret += r + ep_len += 1 + logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) + + # Prepare for interaction with environment + total_steps = steps_per_epoch * epochs + start_time = time.time() + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Main loop: collect experience in env and update/log each epoch + for t in range(total_steps): + + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy (with some noise, via act_noise). + if t > start_steps: + a = get_action(o, act_noise) + else: + a = env.action_space.sample() + + # Step the env + o2, r, d, _ = env.step(a) + ep_ret += r + ep_len += 1 + + # Ignore the "done" signal if it comes from hitting the time + # horizon (that is, when it's an artificial terminal signal + # that isn't based on the agent's state) + d = False if ep_len==max_ep_len else d + + # Store experience to replay buffer + replay_buffer.store(o, a, r, o2, d) + + # Super critical, easy to overlook step: make sure to update + # most recent observation! + o = o2 + + # End of trajectory handling + if d or (ep_len == max_ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for j in range(update_every): + batch = replay_buffer.sample_batch(batch_size) + update(data=batch, timer=j) + + # End of epoch handling + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch + + # Save model + if (epoch % save_freq == 0) or (epoch == epochs): + logger.save_state({'env': env}, None) + + # Test the performance of the deterministic version of the agent. + test_agent() + + # Log info about epoch + logger.log_tabular('Epoch', epoch) + logger.log_tabular('EpRet', with_min_and_max=True) + logger.log_tabular('TestEpRet', with_min_and_max=True) + logger.log_tabular('EpLen', average_only=True) + logger.log_tabular('TestEpLen', average_only=True) + logger.log_tabular('TotalEnvInteracts', t) + logger.log_tabular('Q1Vals', with_min_and_max=True) + logger.log_tabular('Q2Vals', with_min_and_max=True) + logger.log_tabular('LossPi', average_only=True) + logger.log_tabular('LossQ', average_only=True) + logger.log_tabular('Time', time.time()-start_time) + logger.dump_tabular() + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='HalfCheetah-v2') + parser.add_argument('--seed', '-s', type=int, default=0) + parser.add_argument('--exp_name', type=str, default='ex13-td3') + parser.add_argument('--use_soln', action='store_true') + args = parser.parse_args() + + from spinup.utils.run_utils import setup_logger_kwargs + logger_kwargs = setup_logger_kwargs(args.exp_name + '-' + args.env.lower(), args.seed) + + all_kwargs = dict( + env_fn=lambda : gym.make(args.env), + actor_critic=core.MLPActorCritic, + ac_kwargs=dict(hidden_sizes=[128,128]), + max_ep_len=150, + seed=args.seed, + logger_kwargs=logger_kwargs, + epochs=10 + ) + + if args.use_soln: + true_td3(**all_kwargs) + else: + td3(**all_kwargs) diff --git a/spinup/exercises/pytorch/problem_set_1_solutions/exercise1_1_soln.py b/spinup/exercises/pytorch/problem_set_1_solutions/exercise1_1_soln.py new file mode 100644 index 000000000..1bb183a47 --- /dev/null +++ b/spinup/exercises/pytorch/problem_set_1_solutions/exercise1_1_soln.py @@ -0,0 +1,8 @@ +import torch +import numpy as np + +EPS=1e-8 + +def gaussian_likelihood(x, mu, log_std): + pre_sum = -0.5 * (((x-mu)/(torch.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi)) + return pre_sum.sum(axis=-1) \ No newline at end of file diff --git a/spinup/exercises/pytorch/problem_set_1_solutions/exercise1_2_soln.py b/spinup/exercises/pytorch/problem_set_1_solutions/exercise1_2_soln.py new file mode 100644 index 000000000..4a85f685a --- /dev/null +++ b/spinup/exercises/pytorch/problem_set_1_solutions/exercise1_2_soln.py @@ -0,0 +1,49 @@ +import torch +import torch.nn as nn +import numpy as np + +EPS=1e-8 + +def mlp(sizes, activation, output_activation=nn.Identity): + layers = [] + for j in range(len(sizes)-1): + act = activation if j < len(sizes)-2 else output_activation + layers += [nn.Linear(sizes[j], sizes[j+1]), act()] + return nn.Sequential(*layers) + +def gaussian_likelihood(x, mu, log_std): + pre_sum = -0.5 * (((x-mu)/(torch.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi)) + return pre_sum.sum(axis=-1) + + +class DiagonalGaussianDistribution: + + def __init__(self, mu, log_std): + self.mu = mu + self.log_std = log_std + + def sample(self): + return self.mu + torch.exp(self.log_std) * torch.randn_like(self.mu) + + def log_prob(self, value): + return gaussian_likelihood(value, self.mu, self.log_std) + + def entropy(self): + return 0.5 + 0.5 * np.log(2 * np.pi) + self.log_std.sum(axis=-1) + + +class MLPGaussianActor(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + log_std = -0.5 * np.ones(act_dim, dtype=np.float32) + self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) + self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation) + + def forward(self, obs, act=None): + mu = self.mu_net(obs) + pi = DiagonalGaussianDistribution(mu, self.log_std) + logp_a = None + if act is not None: + logp_a = pi.log_prob(act) + return pi, logp_a \ No newline at end of file diff --git a/spinup/exercises/pytorch/problem_set_2/exercise2_2.py b/spinup/exercises/pytorch/problem_set_2/exercise2_2.py new file mode 100644 index 000000000..4909da386 --- /dev/null +++ b/spinup/exercises/pytorch/problem_set_2/exercise2_2.py @@ -0,0 +1,93 @@ +from spinup.algos.pytorch.ddpg.core import mlp, MLPActorCritic +from spinup.utils.run_utils import ExperimentGrid +from spinup import ddpg_pytorch as ddpg +import numpy as np +import torch +import torch.nn as nn + +""" + +Exercise 2.2: Silent Bug in DDPG (PyTorch Version) + +In this exercise, you will run DDPG with a bugged actor critic. Your goal is +to determine whether or not there is any performance degredation, and if so, +figure out what's going wrong. + +You do NOT need to write code for this exercise. + +""" + +""" +Bugged Actor-Critic +""" + +class BuggedMLPActor(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit): + super().__init__() + pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim] + self.pi = mlp(pi_sizes, activation, nn.Tanh) + self.act_limit = act_limit + + def forward(self, obs): + # Return output from network scaled to action space limits. + return self.act_limit * self.pi(obs) + +class BuggedMLPQFunction(nn.Module): + + def __init__(self, obs_dim, act_dim, hidden_sizes, activation): + super().__init__() + self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation) + + def forward(self, obs, act): + return self.q(torch.cat([obs, act], dim=-1)) + +class BuggedMLPActorCritic(nn.Module): + + def __init__(self, observation_space, action_space, hidden_sizes=(256,256), + activation=nn.ReLU): + super().__init__() + + obs_dim = observation_space.shape[0] + act_dim = action_space.shape[0] + act_limit = action_space.high[0] + + # build policy and value functions + self.pi = BuggedMLPActor(obs_dim, act_dim, hidden_sizes, activation, act_limit) + self.q = BuggedMLPQFunction(obs_dim, act_dim, hidden_sizes, activation) + + def act(self, obs): + with torch.no_grad(): + return self.pi(obs).numpy() + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--env', type=str, default='HalfCheetah-v2') + parser.add_argument('--h', type=int, default=300) + parser.add_argument('--l', type=int, default=1) + parser.add_argument('--num_runs', '-n', type=int, default=3) + parser.add_argument('--steps_per_epoch', '-s', type=int, default=5000) + parser.add_argument('--total_steps', '-t', type=int, default=int(5e4)) + args = parser.parse_args() + + def ddpg_with_actor_critic(bugged, **kwargs): + from spinup.exercises.pytorch.problem_set_2.exercise2_2 import BuggedMLPActorCritic + actor_critic = BuggedMLPActorCritic if bugged else MLPActorCritic + return ddpg(actor_critic=actor_critic, + ac_kwargs=dict(hidden_sizes=[args.h]*args.l), + start_steps=5000, + max_ep_len=150, + batch_size=64, + polyak=0.95, + **kwargs) + + eg = ExperimentGrid(name='ex2-2_ddpg') + eg.add('replay_size', int(args.total_steps)) + eg.add('env_name', args.env, '', True) + eg.add('seed', [10*i for i in range(args.num_runs)]) + eg.add('epochs', int(args.total_steps / args.steps_per_epoch)) + eg.add('steps_per_epoch', args.steps_per_epoch) + eg.add('bugged', [False, True]) + eg.run(ddpg_with_actor_critic, datestamp=True) \ No newline at end of file diff --git a/spinup/exercises/problem_set_1/exercise1_1.py b/spinup/exercises/tf1/problem_set_1/exercise1_1.py similarity index 95% rename from spinup/exercises/problem_set_1/exercise1_1.py rename to spinup/exercises/tf1/problem_set_1/exercise1_1.py index c52febe79..a05a55f61 100644 --- a/spinup/exercises/problem_set_1/exercise1_1.py +++ b/spinup/exercises/tf1/problem_set_1/exercise1_1.py @@ -35,7 +35,7 @@ def gaussian_likelihood(x, mu, log_std): """ Run this file to verify your solution. """ - from spinup.exercises.problem_set_1_solutions import exercise1_1_soln + from spinup.exercises.tf1.problem_set_1_solutions import exercise1_1_soln from spinup.exercises.common import print_result sess = tf.Session() diff --git a/spinup/exercises/problem_set_1/exercise1_2.py b/spinup/exercises/tf1/problem_set_1/exercise1_2.py similarity index 97% rename from spinup/exercises/problem_set_1/exercise1_2.py rename to spinup/exercises/tf1/problem_set_1/exercise1_2.py index 9d2569fef..05a2de916 100644 --- a/spinup/exercises/problem_set_1/exercise1_2.py +++ b/spinup/exercises/tf1/problem_set_1/exercise1_2.py @@ -1,6 +1,6 @@ import tensorflow as tf import numpy as np -from spinup.exercises.problem_set_1 import exercise1_1 +from spinup.exercises.tf1.problem_set_1 import exercise1_1 """ @@ -91,7 +91,7 @@ def mlp_gaussian_policy(x, a, hidden_sizes, activation, output_activation, actio Run this file to verify your solution. """ - from spinup import ppo + from spinup import ppo_tf1 as ppo from spinup.exercises.common import print_result import gym import os diff --git a/spinup/exercises/problem_set_1/exercise1_3.py b/spinup/exercises/tf1/problem_set_1/exercise1_3.py similarity index 88% rename from spinup/exercises/problem_set_1/exercise1_3.py rename to spinup/exercises/tf1/problem_set_1/exercise1_3.py index ca8da96b8..2d073ef03 100644 --- a/spinup/exercises/problem_set_1/exercise1_3.py +++ b/spinup/exercises/tf1/problem_set_1/exercise1_3.py @@ -2,9 +2,9 @@ import tensorflow as tf import gym import time -from spinup.algos.td3 import core -from spinup.algos.td3.td3 import td3 as true_td3 -from spinup.algos.td3.core import get_vars +from spinup.algos.tf1.td3 import core +from spinup.algos.tf1.td3.td3 import td3 as true_td3 +from spinup.algos.tf1.td3.core import get_vars from spinup.utils.logx import EpochLogger """ @@ -52,17 +52,18 @@ def sample_batch(self, batch_size=32): rews=self.rews_buf[idxs], done=self.done_buf[idxs]) -""" -TD3 (Twin Delayed DDPG) -""" + def td3(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, - steps_per_epoch=5000, epochs=100, replay_size=int(1e6), gamma=0.99, + steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, - act_noise=0.1, target_noise=0.2, noise_clip=0.5, policy_delay=2, - max_ep_len=1000, logger_kwargs=dict(), save_freq=1): + update_after=1000, update_every=50, act_noise=0.1, target_noise=0.2, + noise_clip=0.5, policy_delay=2, num_test_episodes=10, max_ep_len=1000, + logger_kwargs=dict(), save_freq=1): """ + Twin Delayed Deep Deterministic Policy Gradient (TD3) + Args: env_fn : A function which creates a copy of the environment. @@ -121,6 +122,15 @@ def td3(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, start_steps (int): Number of steps for uniform-random action selection, before running real policy. Helps exploration. + update_after (int): Number of env interactions to collect before + starting to do gradient descent updates. Ensures replay buffer + is full enough for useful updates. + + update_every (int): Number of env interactions that should elapse + between gradient descent updates. Note: Regardless of how long + you wait between updates, the ratio of env steps to gradient steps + is locked to 1. + act_noise (float): Stddev for Gaussian exploration noise added to policy at training time. (At test time, no noise is added.) @@ -133,6 +143,9 @@ def td3(env_fn, actor_critic=core.mlp_actor_critic, ac_kwargs=dict(), seed=0, policy_delay (int): Policy will only be updated once every policy_delay times for each update of the Q-networks. + num_test_episodes (int): Number of episodes to test the deterministic + policy at the end of each epoch. + max_ep_len (int): Maximum length of trajectory / episode / rollout. logger_kwargs (dict): Keyword args for EpochLogger. @@ -262,9 +275,9 @@ def get_action(o, noise_scale): a += noise_scale * np.random.randn(act_dim) return np.clip(a, -act_limit, act_limit) - def test_agent(n=10): - for j in range(n): - o, r, d, ep_ret, ep_len = test_env.reset(), 0, False, 0, 0 + def test_agent(): + for j in range(num_test_episodes): + o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 while not(d or (ep_len == max_ep_len)): # Take deterministic actions at test time (noise_scale=0) o, r, d, _ = test_env.step(get_action(o, 0)) @@ -273,17 +286,15 @@ def test_agent(n=10): logger.store(TestEpRet=ep_ret, TestEpLen=ep_len) start_time = time.time() - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 + o, ep_ret, ep_len = env.reset(), 0, 0 total_steps = steps_per_epoch * epochs # Main loop: collect experience in env and update/log each epoch for t in range(total_steps): - """ - Until start_steps have elapsed, randomly sample actions - from a uniform distribution for better exploration. Afterwards, - use the learned policy (with some noise, via act_noise). - """ + # Until start_steps have elapsed, randomly sample actions + # from a uniform distribution for better exploration. Afterwards, + # use the learned policy (with some noise, via act_noise). if t > start_steps: a = get_action(o, act_noise) else: @@ -306,13 +317,14 @@ def test_agent(n=10): # most recent observation! o = o2 + # End of trajectory handling if d or (ep_len == max_ep_len): - """ - Perform all TD3 updates at the end of the trajectory - (in accordance with source code of TD3 published by - original authors). - """ - for j in range(ep_len): + logger.store(EpRet=ep_ret, EpLen=ep_len) + o, ep_ret, ep_len = env.reset(), 0, 0 + + # Update handling + if t >= update_after and t % update_every == 0: + for j in range(update_every): batch = replay_buffer.sample_batch(batch_size) feed_dict = {x_ph: batch['obs1'], x2_ph: batch['obs2'], @@ -329,15 +341,12 @@ def test_agent(n=10): outs = sess.run([pi_loss, train_pi_op, target_update], feed_dict) logger.store(LossPi=outs[0]) - logger.store(EpRet=ep_ret, EpLen=ep_len) - o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0 - # End of epoch wrap-up - if t > 0 and t % steps_per_epoch == 0: - epoch = t // steps_per_epoch + if (t+1) % steps_per_epoch == 0: + epoch = (t+1) // steps_per_epoch # Save model - if (epoch % save_freq == 0) or (epoch == epochs-1): + if (epoch % save_freq == 0) or (epoch == epochs): logger.save_state({'env': env}, None) # Test the performance of the deterministic version of the agent. diff --git a/spinup/exercises/problem_set_1_solutions/exercise1_1_soln.py b/spinup/exercises/tf1/problem_set_1_solutions/exercise1_1_soln.py similarity index 100% rename from spinup/exercises/problem_set_1_solutions/exercise1_1_soln.py rename to spinup/exercises/tf1/problem_set_1_solutions/exercise1_1_soln.py diff --git a/spinup/exercises/problem_set_1_solutions/exercise1_2_soln.py b/spinup/exercises/tf1/problem_set_1_solutions/exercise1_2_soln.py similarity index 100% rename from spinup/exercises/problem_set_1_solutions/exercise1_2_soln.py rename to spinup/exercises/tf1/problem_set_1_solutions/exercise1_2_soln.py diff --git a/spinup/exercises/problem_set_2/exercise2_2.py b/spinup/exercises/tf1/problem_set_2/exercise2_2.py similarity index 96% rename from spinup/exercises/problem_set_2/exercise2_2.py rename to spinup/exercises/tf1/problem_set_2/exercise2_2.py index 2e31727a1..2220f1ae5 100644 --- a/spinup/exercises/problem_set_2/exercise2_2.py +++ b/spinup/exercises/tf1/problem_set_2/exercise2_2.py @@ -1,6 +1,6 @@ -from spinup.algos.ddpg.core import mlp, mlp_actor_critic +from spinup.algos.tf1.ddpg.core import mlp, mlp_actor_critic from spinup.utils.run_utils import ExperimentGrid -from spinup import ddpg +from spinup import ddpg_tf1 as ddpg import numpy as np import tensorflow as tf diff --git a/spinup/run.py b/spinup/run.py index e3ecfabfd..9b8526e88 100644 --- a/spinup/run.py +++ b/spinup/run.py @@ -1,4 +1,5 @@ import spinup +from spinup.user_config import DEFAULT_BACKEND from spinup.utils.run_utils import ExperimentGrid from spinup.utils.serialization_utils import convert_json import argparse @@ -8,6 +9,8 @@ import os.path as osp import string import tensorflow as tf +import torch +from copy import deepcopy from textwrap import dedent @@ -25,6 +28,17 @@ # Only some algorithms can be parallelized (have num_cpu > 1): MPI_COMPATIBLE_ALGOS = ['vpg', 'trpo', 'ppo'] +# Algo names (used in a few places) +BASE_ALGO_NAMES = ['vpg', 'trpo', 'ppo', 'ddpg', 'td3', 'sac'] + + +def add_with_backends(algo_list): + # helper function to build lists with backend-specific function names + algo_list_with_backends = deepcopy(algo_list) + for algo in algo_list: + algo_list_with_backends += [algo + '_tf1', algo + '_pytorch'] + return algo_list_with_backends + def friendly_err(err_msg): # add whitespace to error message to make it more readable @@ -34,7 +48,11 @@ def friendly_err(err_msg): def parse_and_execute_grid_search(cmd, args): """Interprets algorithm name and cmd line args into an ExperimentGrid.""" - # Parse which algorithm to execute + if cmd in BASE_ALGO_NAMES: + backend = DEFAULT_BACKEND[cmd] + print('\n\nUsing default backend (%s) for %s.\n'%(backend, cmd)) + cmd = cmd + '_' + backend + algo = eval('spinup.'+cmd) # Before all else, check to see if any of the flags is 'help'. @@ -130,7 +148,7 @@ def process(arg): # Make sure that if num_cpu > 1, the algorithm being used is compatible # with MPI. if 'num_cpu' in run_kwargs and not(run_kwargs['num_cpu'] == 1): - assert cmd in MPI_COMPATIBLE_ALGOS, \ + assert cmd in add_with_backends(MPI_COMPATIBLE_ALGOS), \ friendly_err("This algorithm can't be run with num_cpu > 1.") # Special handling for environment: make sure that env_name is a real, @@ -175,7 +193,7 @@ def process(arg): """ cmd = sys.argv[1] if len(sys.argv) > 1 else 'help' - valid_algos = ['vpg', 'trpo', 'ppo', 'ddpg', 'td3', 'sac'] + valid_algos = add_with_backends(BASE_ALGO_NAMES) valid_utils = ['plot', 'test_policy'] valid_help = ['--help', '-h', 'help'] valid_cmds = valid_algos + valid_utils + valid_help diff --git a/spinup/user_config.py b/spinup/user_config.py index 5ada7858d..a5dd6864d 100644 --- a/spinup/user_config.py +++ b/spinup/user_config.py @@ -1,6 +1,17 @@ import os import os.path as osp +# Default neural network backend for each algo +# (Must be either 'tf1' or 'pytorch') +DEFAULT_BACKEND = { + 'vpg': 'pytorch', + 'trpo': 'tf1', + 'ppo': 'pytorch', + 'ddpg': 'pytorch', + 'td3': 'pytorch', + 'sac': 'pytorch' +} + # Where experiment outputs are saved by default: DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(__file__))),'data') diff --git a/spinup/utils/logx.py b/spinup/utils/logx.py index 11e998890..281b736bd 100644 --- a/spinup/utils/logx.py +++ b/spinup/utils/logx.py @@ -10,7 +10,9 @@ import shutil import numpy as np import tensorflow as tf +import torch import os.path as osp, time, atexit, os +import warnings from spinup.utils.mpi_tools import proc_id, mpi_statistics_scalar from spinup.utils.serialization_utils import convert_json @@ -186,6 +188,8 @@ def save_state(self, state_dict, itr=None): self.log('Warning: could not pickle state_dict.', color='red') if hasattr(self, 'tf_saver_elements'): self._tf_simple_save(itr) + if hasattr(self, 'pytorch_saver_elements'): + self._pytorch_simple_save(itr) def setup_tf_saver(self, sess, inputs, outputs): """ @@ -217,7 +221,7 @@ def _tf_simple_save(self, itr=None): if proc_id()==0: assert hasattr(self, 'tf_saver_elements'), \ "First have to setup saving with self.setup_tf_saver" - fpath = 'simple_save' + ('%d'%itr if itr is not None else '') + fpath = 'tf1_save' + ('%d'%itr if itr is not None else '') fpath = osp.join(self.output_dir, fpath) if osp.exists(fpath): # simple_save refuses to be useful if fpath already exists, @@ -226,6 +230,48 @@ def _tf_simple_save(self, itr=None): tf.saved_model.simple_save(export_dir=fpath, **self.tf_saver_elements) joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl')) + + def setup_pytorch_saver(self, what_to_save): + """ + Set up easy model saving for a single PyTorch model. + + Because PyTorch saving and loading is especially painless, this is + very minimal; we just need references to whatever we would like to + pickle. This is integrated into the logger because the logger + knows where the user would like to save information about this + training run. + + Args: + what_to_save: Any PyTorch model or serializable object containing + PyTorch models. + """ + self.pytorch_saver_elements = what_to_save + + def _pytorch_simple_save(self, itr=None): + """ + Saves the PyTorch model (or models). + """ + if proc_id()==0: + assert hasattr(self, 'pytorch_saver_elements'), \ + "First have to setup saving with self.setup_pytorch_saver" + fpath = 'pyt_save' + fpath = osp.join(self.output_dir, fpath) + fname = 'model' + ('%d'%itr if itr is not None else '') + '.pt' + fname = osp.join(fpath, fname) + os.makedirs(fpath, exist_ok=True) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # We are using a non-recommended way of saving PyTorch models, + # by pickling whole objects (which are dependent on the exact + # directory structure at the time of saving) as opposed to + # just saving network weights. This works sufficiently well + # for the purposes of Spinning Up, but you may want to do + # something different for your personal PyTorch project. + # We use a catch_warnings() context to avoid the warnings about + # not being able to save the source code. + torch.save(self.pytorch_saver_elements, fname) + + def dump_tabular(self): """ Write all of the diagnostics from the current iteration. @@ -245,7 +291,7 @@ def dump_tabular(self): valstr = "%8.3g"%val if hasattr(val, "__float__") else val print(fmt%(key, valstr)) vals.append(val) - print("-"*n_slashes) + print("-"*n_slashes, flush=True) if self.output_file is not None: if self.first_row: self.output_file.write("\t".join(self.log_headers)+"\n") diff --git a/spinup/utils/mpi_pytorch.py b/spinup/utils/mpi_pytorch.py new file mode 100644 index 000000000..1b46e4efb --- /dev/null +++ b/spinup/utils/mpi_pytorch.py @@ -0,0 +1,35 @@ +import multiprocessing +import numpy as np +import os +import torch +from mpi4py import MPI +from spinup.utils.mpi_tools import broadcast, mpi_avg, num_procs, proc_id + +def setup_pytorch_for_mpi(): + """ + Avoid slowdowns caused by each separate process's PyTorch using + more than its fair share of CPU resources. + """ + #print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True) + if torch.get_num_threads()==1: + return + fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1) + torch.set_num_threads(fair_num_threads) + #print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True) + +def mpi_avg_grads(module): + """ Average contents of gradient buffers across MPI processes. """ + if num_procs()==1: + return + for p in module.parameters(): + p_grad_numpy = p.grad.numpy() # numpy view of tensor data + avg_p_grad = mpi_avg(p.grad) + p_grad_numpy[:] = avg_p_grad[:] + +def sync_params(module): + """ Sync all parameters of module across all MPI processes. """ + if num_procs()==1: + return + for p in module.parameters(): + p_numpy = p.data.numpy() + broadcast(p_numpy) \ No newline at end of file diff --git a/spinup/utils/plot.py b/spinup/utils/plot.py index 3824e2d65..036cabf4f 100644 --- a/spinup/utils/plot.py +++ b/spinup/utils/plot.py @@ -40,6 +40,8 @@ def plot_data(data, xaxis='Epoch', value="AverageEpRet", condition="Condition1", Changes the colorscheme and the default legend style, though. """ plt.legend(loc='best').set_draggable(True) + #plt.legend(loc='upper center', ncol=3, handlelength=1, + # borderaxespad=0., prop={'size': 13}) """ For the version of the legend used in the Spinning Up benchmarking page, diff --git a/spinup/utils/test_policy.py b/spinup/utils/test_policy.py index 8c97f1409..fe5d20ca3 100644 --- a/spinup/utils/test_policy.py +++ b/spinup/utils/test_policy.py @@ -3,21 +3,76 @@ import os import os.path as osp import tensorflow as tf +import torch from spinup import EpochLogger from spinup.utils.logx import restore_tf_graph -def load_policy(fpath, itr='last', deterministic=False): + +def load_policy_and_env(fpath, itr='last', deterministic=False): + """ + Load a policy from save, whether it's TF or PyTorch, along with RL env. + + Not exceptionally future-proof, but it will suffice for basic uses of the + Spinning Up implementations. + + Checks to see if there's a tf1_save folder. If yes, assumes the model + is tensorflow and loads it that way. Otherwise, loads as if there's a + PyTorch save. + """ + + # determine if tf save or pytorch save + if any(['tf1_save' in x for x in os.listdir(fpath)]): + backend = 'tf1' + else: + backend = 'pytorch' # handle which epoch to load from if itr=='last': - saves = [int(x[11:]) for x in os.listdir(fpath) if 'simple_save' in x and len(x)>11] + # check filenames for epoch (AKA iteration) numbers, find maximum value + + if backend == 'tf1': + saves = [int(x[8:]) for x in os.listdir(fpath) if 'tf1_save' in x and len(x)>8] + + elif backend == 'pytorch': + pytsave_path = osp.join(fpath, 'pyt_save') + # Each file in this folder has naming convention 'modelXX.pt', where + # 'XX' is either an integer or empty string. Empty string case + # corresponds to len(x)==8, hence that case is excluded. + saves = [int(x.split('.')[0][5:]) for x in os.listdir(pytsave_path) if len(x)>8 and 'model' in x] + itr = '%d'%max(saves) if len(saves) > 0 else '' + else: + assert isinstance(itr, int), \ + "Bad value provided for itr (needs to be int or 'last')." itr = '%d'%itr + # load the get_action function + if backend == 'tf1': + get_action = load_tf_policy(fpath, itr, deterministic) + else: + get_action = load_pytorch_policy(fpath, itr, deterministic) + + # try to load environment from save + # (sometimes this will fail because the environment could not be pickled) + try: + state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl')) + env = state['env'] + except: + env = None + + return env, get_action + + +def load_tf_policy(fpath, itr, deterministic=False): + """ Load a tensorflow policy saved with Spinning Up Logger.""" + + fname = osp.join(fpath, 'tf1_save'+itr) + print('\n\nLoading from %s.\n\n'%fname) + # load the things! sess = tf.Session() - model = restore_tf_graph(sess, osp.join(fpath, 'simple_save'+itr)) + model = restore_tf_graph(sess, fname) # get the correct op for executing actions if deterministic and 'mu' in model.keys(): @@ -31,15 +86,25 @@ def load_policy(fpath, itr='last', deterministic=False): # make function for producing an action given a single state get_action = lambda x : sess.run(action_op, feed_dict={model['x']: x[None,:]})[0] - # try to load environment from save - # (sometimes this will fail because the environment could not be pickled) - try: - state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl')) - env = state['env'] - except: - env = None + return get_action - return env, get_action + +def load_pytorch_policy(fpath, itr, deterministic=False): + """ Load a pytorch policy saved with Spinning Up Logger.""" + + fname = osp.join(fpath, 'pyt_save', 'model'+itr+'.pt') + print('\n\nLoading from %s.\n\n'%fname) + + model = torch.load(fname) + + # make function for producing an action given a single state + def get_action(x): + with torch.no_grad(): + x = torch.as_tensor(x, dtype=torch.float32) + action = model.act(x) + return action + + return get_action def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): @@ -82,7 +147,7 @@ def run_policy(env, get_action, max_ep_len=None, num_episodes=100, render=True): parser.add_argument('--itr', '-i', type=int, default=-1) parser.add_argument('--deterministic', '-d', action='store_true') args = parser.parse_args() - env, get_action = load_policy(args.fpath, - args.itr if args.itr >=0 else 'last', - args.deterministic) + env, get_action = load_policy_and_env(args.fpath, + args.itr if args.itr >=0 else 'last', + args.deterministic) run_policy(env, get_action, args.len, args.episodes, not(args.norender)) \ No newline at end of file diff --git a/spinup/version.py b/spinup/version.py index 9b8990464..1a6eee978 100644 --- a/spinup/version.py +++ b/spinup/version.py @@ -1,4 +1,4 @@ -version_info = (0, 1, 3) +version_info = (0, 2, 0) # format: # ('spinup_major', 'spinup_minor', 'spinup_patch')